diff --git a/configure/setup.py b/configure/setup.py index 2c523240..c7e0bb6e 100644 --- a/configure/setup.py +++ b/configure/setup.py @@ -34,6 +34,7 @@ "absl_py", "wheel", "wurlitzer", + "tf_keras", ] diff --git a/documentation/known_issues.md b/documentation/known_issues.md index d08b7a35..fd4e0ef6 100644 --- a/documentation/known_issues.md +++ b/documentation/known_issues.md @@ -90,7 +90,7 @@ to the estimator format. While abstracted by the Keras API, a model instantiated in Python (e.g., with `tfdf.keras.RandomForestModel()`) and a model loaded from disk (e.g., with -`tf.keras.models.load_model()`) can behave differently. Notably, a Python +`tf_keras.models.load_model()`) can behave differently. Notably, a Python instantiated model automatically applies necessary type conversions. For example, if a `float64` feature is fed to a model expecting a `float32` feature, this conversion is performed implicitly. However, such a conversion is not diff --git a/documentation/migration.md b/documentation/migration.md index 325b4c72..79b5068b 100644 --- a/documentation/migration.md +++ b/documentation/migration.md @@ -352,18 +352,18 @@ dataset reads are deterministic as well. #### Specify a task (e.g. classification, ranking) instead of a loss (e.g. binary cross-entropy) ```diff {.bad} -- model = tf.keras.Sequential() +- model = tf_keras.Sequential() - model.add(Dense(64, activation=relu)) - model.add(Dense(1)) # One output for binary classification -- model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), +- model.compile(loss=tf_keras.losses.BinaryCrossentropy(from_logits=True), - optimizer='adam', - metrics=['accuracy']) ``` ```diff {.good} # The loss is automatically determined from the task. -+ model = tfdf.keras.GradientBoostedTreesModel(task=tf.keras.Task.CLASSIFICATION) ++ model = tfdf.keras.GradientBoostedTreesModel(task=tf_keras.Task.CLASSIFICATION) # Optional if you want to report the accuracy. + model.compile(metrics=['accuracy']) diff --git a/documentation/tf_df_in_tf_js.md b/documentation/tf_df_in_tf_js.md index 0ef67888..98d46488 100644 --- a/documentation/tf_df_in_tf_js.md +++ b/documentation/tf_df_in_tf_js.md @@ -45,9 +45,10 @@ import tensorflow as tf import tensorflow_decision_forests as tfdf import tensorflowjs as tfjs from google.colab import files +import tf_keras # Load the model with Keras -model = tf.keras.models.load_model("/tmp/my_saved_model/") +model = tf_keras.models.load_model("/tmp/my_saved_model/") # Convert the keras model to TensorFlow.js tfjs.converters.tf_saved_model_conversion_v2.convert_keras_model_to_graph_model(model, "./tfjs_model") diff --git a/documentation/tutorials/advanced_colab.ipynb b/documentation/tutorials/advanced_colab.ipynb index 9ffa076b..6a7d151a 100644 --- a/documentation/tutorials/advanced_colab.ipynb +++ b/documentation/tutorials/advanced_colab.ipynb @@ -639,7 +639,7 @@ }, "outputs": [], "source": [ - "manual_model = tf.keras.models.load_model(\"/tmp/manual_model\")" + "manual_model = tf_keras.models.load_model(\"/tmp/manual_model\")" ] }, { diff --git a/documentation/tutorials/beginner_colab.ipynb b/documentation/tutorials/beginner_colab.ipynb index a5beb153..c39baa6d 100644 --- a/documentation/tutorials/beginner_colab.ipynb +++ b/documentation/tutorials/beginner_colab.ipynb @@ -1078,16 +1078,16 @@ "source": [ "%set_cell_height 300\n", "\n", - "body_mass_g = tf.keras.layers.Input(shape=(1,), name=\"body_mass_g\")\n", + "body_mass_g = tf_keras.layers.Input(shape=(1,), name=\"body_mass_g\")\n", "body_mass_kg = body_mass_g / 1000.0\n", "\n", - "bill_length_mm = tf.keras.layers.Input(shape=(1,), name=\"bill_length_mm\")\n", + "bill_length_mm = tf_keras.layers.Input(shape=(1,), name=\"bill_length_mm\")\n", "\n", "raw_inputs = {\"body_mass_g\": body_mass_g, \"bill_length_mm\": bill_length_mm}\n", "processed_inputs = {\"body_mass_kg\": body_mass_kg, \"bill_length_mm\": bill_length_mm}\n", "\n", "# \"preprocessor\" contains the preprocessing logic.\n", - "preprocessor = tf.keras.Model(inputs=raw_inputs, outputs=processed_inputs)\n", + "preprocessor = tf_keras.Model(inputs=raw_inputs, outputs=processed_inputs)\n", "\n", "# \"model_4\" contains both the pre-processing logic and the decision forest.\n", "model_4 = tfdf.keras.RandomForestModel(preprocessing=preprocessor)\n", @@ -1122,7 +1122,7 @@ " tf.feature_column.numeric_column(\"bill_length_mm\"),\n", "]\n", "\n", - "preprocessing = tf.keras.layers.DenseFeatures(feature_columns)\n", + "preprocessing = tf_keras.layers.DenseFeatures(feature_columns)\n", "\n", "model_5 = tfdf.keras.RandomForestModel(preprocessing=preprocessing)\n", "model_5.fit(train_ds)" diff --git a/documentation/tutorials/intermediate_colab.ipynb b/documentation/tutorials/intermediate_colab.ipynb index 30d256fe..f19156ab 100644 --- a/documentation/tutorials/intermediate_colab.ipynb +++ b/documentation/tutorials/intermediate_colab.ipynb @@ -421,12 +421,12 @@ "hub_url = \"https://tfhub.dev/google/universal-sentence-encoder/4\"\n", "embedding = hub.KerasLayer(hub_url)\n", "\n", - "sentence = tf.keras.layers.Input(shape=(), name=\"sentence\", dtype=tf.string)\n", + "sentence = tf_keras.layers.Input(shape=(), name=\"sentence\", dtype=tf.string)\n", "embedded_sentence = embedding(sentence)\n", "\n", "raw_inputs = {\"sentence\": sentence}\n", "processed_inputs = {\"embedded_sentence\": embedded_sentence}\n", - "preprocessor = tf.keras.Model(inputs=raw_inputs, outputs=processed_inputs)\n", + "preprocessor = tf_keras.Model(inputs=raw_inputs, outputs=processed_inputs)\n", "\n", "model_2 = tfdf.keras.RandomForestModel(\n", " preprocessing=preprocessor,\n", @@ -621,8 +621,8 @@ }, "outputs": [], "source": [ - "input_1 = tf.keras.Input(shape=(1,), name=\"bill_length_mm\", dtype=\"float\")\n", - "input_2 = tf.keras.Input(shape=(1,), name=\"island\", dtype=\"string\")\n", + "input_1 = tf_keras.Input(shape=(1,), name=\"bill_length_mm\", dtype=\"float\")\n", + "input_2 = tf_keras.Input(shape=(1,), name=\"island\", dtype=\"string\")\n", "\n", "nn_raw_inputs = [input_1, input_2]" ] @@ -645,9 +645,9 @@ "outputs": [], "source": [ "# Normalization.\n", - "Normalization = tf.keras.layers.Normalization\n", - "CategoryEncoding = tf.keras.layers.CategoryEncoding\n", - "StringLookup = tf.keras.layers.StringLookup\n", + "Normalization = tf_keras.layers.Normalization\n", + "CategoryEncoding = tf_keras.layers.CategoryEncoding\n", + "StringLookup = tf_keras.layers.StringLookup\n", "\n", "values = train_ds_pd[\"bill_length_mm\"].values[:, tf.newaxis]\n", "input_1_normalizer = Normalization()\n", @@ -682,15 +682,15 @@ }, "outputs": [], "source": [ - "y = tf.keras.layers.Concatenate()(nn_processed_inputs)\n", - "y = tf.keras.layers.Dense(16, activation=tf.nn.relu6)(y)\n", - "last_layer = tf.keras.layers.Dense(8, activation=tf.nn.relu, name=\"last\")(y)\n", + "y = tf_keras.layers.Concatenate()(nn_processed_inputs)\n", + "y = tf_keras.layers.Dense(16, activation=tf.nn.relu6)(y)\n", + "last_layer = tf_keras.layers.Dense(8, activation=tf.nn.relu, name=\"last\")(y)\n", "\n", "# \"3\" for the three label classes. If it were a binary classification, the\n", "# output dim would be 1.\n", - "classification_output = tf.keras.layers.Dense(3)(y)\n", + "classification_output = tf_keras.layers.Dense(3)(y)\n", "\n", - "nn_model = tf.keras.models.Model(nn_raw_inputs, classification_output)" + "nn_model = tf_keras.models.Model(nn_raw_inputs, classification_output)" ] }, { @@ -714,7 +714,7 @@ "source": [ "# To reduce the risk of mistakes, group both the decision forest and the\n", "# neural network in a single keras model.\n", - "nn_without_head = tf.keras.models.Model(inputs=nn_model.inputs, outputs=last_layer)\n", + "nn_without_head = tf_keras.models.Model(inputs=nn_model.inputs, outputs=last_layer)\n", "df_and_nn_model = tfdf.keras.RandomForestModel(preprocessing=nn_without_head)" ] }, @@ -740,8 +740,8 @@ "%set_cell_height 300\n", "\n", "nn_model.compile(\n", - " optimizer=tf.keras.optimizers.Adam(),\n", - " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " optimizer=tf_keras.optimizers.Adam(),\n", + " loss=tf_keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", " metrics=[\"accuracy\"])\n", "\n", "nn_model.fit(x=train_ds, validation_data=test_ds, epochs=10)\n", diff --git a/documentation/tutorials/model_composition_colab.ipynb b/documentation/tutorials/model_composition_colab.ipynb index 3e717a98..9ae06bdd 100644 --- a/documentation/tutorials/model_composition_colab.ipynb +++ b/documentation/tutorials/model_composition_colab.ipynb @@ -414,25 +414,25 @@ "outputs": [], "source": [ "# Input features.\n", - "raw_features = tf.keras.layers.Input(shape=(num_features,))\n", + "raw_features = tf_keras.layers.Input(shape=(num_features,))\n", "\n", "# Stage 1\n", "# =======\n", "\n", "# Common learnable pre-processing\n", - "preprocessor = tf.keras.layers.Dense(10, activation=tf.nn.relu6)\n", + "preprocessor = tf_keras.layers.Dense(10, activation=tf.nn.relu6)\n", "preprocess_features = preprocessor(raw_features)\n", "\n", "# Stage 2\n", "# =======\n", "\n", "# Model #1: NN\n", - "m1_z1 = tf.keras.layers.Dense(5, activation=tf.nn.relu6)(preprocess_features)\n", - "m1_pred = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)(m1_z1)\n", + "m1_z1 = tf_keras.layers.Dense(5, activation=tf.nn.relu6)(preprocess_features)\n", + "m1_pred = tf_keras.layers.Dense(1, activation=tf.nn.sigmoid)(m1_z1)\n", "\n", "# Model #2: NN\n", - "m2_z1 = tf.keras.layers.Dense(5, activation=tf.nn.relu6)(preprocess_features)\n", - "m2_pred = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)(m2_z1)\n", + "m2_z1 = tf_keras.layers.Dense(5, activation=tf.nn.relu6)(preprocess_features)\n", + "m2_pred = tf_keras.layers.Dense(1, activation=tf.nn.sigmoid)(m2_z1)\n", "\n", "\n", "# Model #3: DF\n", @@ -460,8 +460,8 @@ "# Keras Models\n", "# ============\n", "\n", - "ensemble_nn_only = tf.keras.models.Model(raw_features, mean_nn_only)\n", - "ensemble_nn_and_df = tf.keras.models.Model(raw_features, mean_nn_and_df)" + "ensemble_nn_only = tf_keras.models.Model(raw_features, mean_nn_only)\n", + "ensemble_nn_and_df = tf_keras.models.Model(raw_features, mean_nn_and_df)" ] }, { @@ -509,8 +509,8 @@ "source": [ "%%time\n", "ensemble_nn_only.compile(\n", - " optimizer=tf.keras.optimizers.Adam(),\n", - " loss=tf.keras.losses.BinaryCrossentropy(),\n", + " optimizer=tf_keras.optimizers.Adam(),\n", + " loss=tf_keras.losses.BinaryCrossentropy(),\n", " metrics=[\"accuracy\"])\n", "\n", "ensemble_nn_only.fit(train_dataset, epochs=20, validation_data=test_dataset)" @@ -610,7 +610,7 @@ "outputs": [], "source": [ "ensemble_nn_and_df.compile(\n", - " loss=tf.keras.losses.BinaryCrossentropy(), metrics=[\"accuracy\"])\n", + " loss=tf_keras.losses.BinaryCrossentropy(), metrics=[\"accuracy\"])\n", "\n", "evaluation_nn_and_df = ensemble_nn_and_df.evaluate(\n", " test_dataset, return_dict=True)\n", diff --git a/documentation/tutorials/predict_colab.ipynb b/documentation/tutorials/predict_colab.ipynb index cd1d7c8f..52958843 100644 --- a/documentation/tutorials/predict_colab.ipynb +++ b/documentation/tutorials/predict_colab.ipynb @@ -93,7 +93,7 @@ "\n", "While abstracted by the Keras API, a model instantiated in Python (e.g., with\n", "`tfdf.keras.RandomForestModel()`) and a model loaded from disk (e.g., with\n", - "`tf.keras.models.load_model()`) can behave differently. Notably, a Python\n", + "`tf_keras.models.load_model()`) can behave differently. Notably, a Python\n", "instantiated model automatically applies necessary type conversions. For\n", "example, if a `float64` feature is fed to a model expecting a `float32` feature,\n", "this conversion is performed implicitly. However, such a conversion is not\n", diff --git a/documentation/tutorials/ranking_colab.ipynb b/documentation/tutorials/ranking_colab.ipynb index 675154c0..b6c018af 100644 --- a/documentation/tutorials/ranking_colab.ipynb +++ b/documentation/tutorials/ranking_colab.ipynb @@ -244,7 +244,7 @@ }, "outputs": [], "source": [ - "archive_path = tf.keras.utils.get_file(\"letor.zip\",\n", + "archive_path = tf_keras.utils.get_file(\"letor.zip\",\n", " \"https://download.microsoft.com/download/E/7/E/E7EABEF1-4C7B-4E31-ACE5-73927950ED5E/Letor.zip\",\n", " extract=True)\n", "\n", diff --git a/examples/BUILD b/examples/BUILD index 8de36c81..cf5cbd77 100644 --- a/examples/BUILD +++ b/examples/BUILD @@ -14,6 +14,7 @@ py_binary( # pandas dep, # TensorFlow Python, "//tensorflow_decision_forests", + # tf_keras dep, ], ) @@ -28,6 +29,7 @@ py_binary( # pandas dep, # TensorFlow Python, "//tensorflow_decision_forests", + # tf_keras dep, ], ) @@ -43,6 +45,7 @@ py_binary( # pandas dep, # TensorFlow Python, "//tensorflow_decision_forests", + # tf_keras dep, ], ) @@ -56,5 +59,6 @@ py_binary( # absl/logging dep, # TensorFlow Python, "//tensorflow_decision_forests", + # tf_keras dep, ], ) diff --git a/examples/hyperparameter_optimization.py b/examples/hyperparameter_optimization.py index 23a26028..98318ab0 100644 --- a/examples/hyperparameter_optimization.py +++ b/examples/hyperparameter_optimization.py @@ -36,14 +36,14 @@ import pandas as pd import tensorflow as tf import tensorflow_decision_forests as tfdf - +import tf_keras def main(argv): if len(argv) > 1: raise app.UsageError("Too many command-line arguments.") # Download the Adult dataset. - dataset_path = tf.keras.utils.get_file( + dataset_path = tf_keras.utils.get_file( "adult.csv", "https://raw.githubusercontent.com/google/yggdrasil-decision-forests/" "main/yggdrasil_decision_forests/test_data/dataset/adult.csv") diff --git a/examples/minimal.py b/examples/minimal.py index 2f1a1f1e..4c951297 100644 --- a/examples/minimal.py +++ b/examples/minimal.py @@ -30,11 +30,11 @@ """ from absl import app - import numpy as np import pandas as pd import tensorflow as tf import tensorflow_decision_forests as tfdf +import tf_keras def main(argv): @@ -42,10 +42,11 @@ def main(argv): raise app.UsageError("Too many command-line arguments.") # Download the Adult dataset. - dataset_path = tf.keras.utils.get_file( + dataset_path = tf_keras.utils.get_file( "adult.csv", "https://raw.githubusercontent.com/google/yggdrasil-decision-forests/" - "main/yggdrasil_decision_forests/test_data/dataset/adult.csv") + "main/yggdrasil_decision_forests/test_data/dataset/adult.csv", + ) # Load a dataset into a Pandas Dataframe. dataset_df = pd.read_csv(dataset_path) # "df" for Pandas's DataFrame. @@ -61,8 +62,10 @@ def main(argv): test_indices = np.random.rand(len(dataset_df)) < 0.30 test_ds_pd = dataset_df[test_indices] train_ds_pd = dataset_df[~test_indices] - print(f"{len(train_ds_pd)} examples in training" - f", {len(test_ds_pd)} examples for testing.") + print( + f"{len(train_ds_pd)} examples in training" + f", {len(test_ds_pd)} examples for testing." + ) # Converts datasets from Pandas dataframe to TensorFlow dataset format. train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_ds_pd, label="income") diff --git a/tensorflow_decision_forests/__init__.py b/tensorflow_decision_forests/__init__.py index c561f2e5..5da90f96 100644 --- a/tensorflow_decision_forests/__init__.py +++ b/tensorflow_decision_forests/__init__.py @@ -46,7 +46,7 @@ # ... # Load a model: it loads as a generic keras model. -loaded_model = tf.keras.models.load_model("/tmp/my_saved_model") +loaded_model = tf_keras.models.load_model("/tmp/my_saved_model") ``` """ diff --git a/tensorflow_decision_forests/component/builder/builder.py b/tensorflow_decision_forests/component/builder/builder.py index fb27b755..833e849f 100644 --- a/tensorflow_decision_forests/component/builder/builder.py +++ b/tensorflow_decision_forests/component/builder/builder.py @@ -106,7 +106,7 @@ builder.close() # Load and use the model -model = tf.keras.models.load_model("/path/to/model") +model = tf_keras.models.load_model("/path/to/model") predictions = model.predict(...) ``` """ diff --git a/tensorflow_decision_forests/component/builder/builder_test.py b/tensorflow_decision_forests/component/builder/builder_test.py index 33079c96..b24983cd 100644 --- a/tensorflow_decision_forests/component/builder/builder_test.py +++ b/tensorflow_decision_forests/component/builder/builder_test.py @@ -25,6 +25,7 @@ import numpy as np import pandas as pd import tensorflow as tf +import tf_keras from tensorflow_decision_forests import keras from tensorflow_decision_forests.component import py_tree @@ -48,8 +49,9 @@ def data_root_path() -> str: def test_data_path() -> str: - return os.path.join(data_root_path(), - "external/ydf/yggdrasil_decision_forests/test_data") + return os.path.join( + data_root_path(), "external/ydf/yggdrasil_decision_forests/test_data" + ) def tmp_path() -> str: @@ -94,47 +96,64 @@ def test_classification_random_forest(self, file_prefix, model_name): NonLeafNode( condition=NumericalHigherThanCondition( feature=SimpleColumnSpec( - name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL), + name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL + ), threshold=1.5, - missing_evaluation=False), + missing_evaluation=False, + ), pos_child=NonLeafNode( condition=CategoricalIsInCondition( feature=SimpleColumnSpec( name="f2", - type=py_tree.dataspec.ColumnType.CATEGORICAL), + type=py_tree.dataspec.ColumnType.CATEGORICAL, + ), mask=["cat", "dog"], - missing_evaluation=False), + missing_evaluation=False, + ), pos_child=LeafNode( value=ProbabilityValue( - probability=[0.8, 0.1, 0.1], num_examples=10)), + probability=[0.8, 0.1, 0.1], num_examples=10 + ) + ), neg_child=LeafNode( value=ProbabilityValue( - probability=[0.1, 0.8, 0.1], num_examples=20))), + probability=[0.1, 0.8, 0.1], num_examples=20 + ) + ), + ), neg_child=LeafNode( value=ProbabilityValue( - probability=[0.1, 0.1, 0.8], num_examples=30))))) + probability=[0.1, 0.1, 0.8], num_examples=30 + ) + ), + ) + ) + ) builder.close() if file_prefix is not None: self.assertEqual( inspector_lib.detect_model_file_prefix( - os.path.join(model_path, "assets")), file_prefix) + os.path.join(model_path, "assets") + ), + file_prefix, + ) logging.info("Loading model") - loaded_model = tf.keras.models.load_model(model_path) + loaded_model = tf_keras.models.load_model(model_path) expected_model_name = ( "inference_core_model" if model_name is None else model_name ) self.assertEqual(loaded_model.name, expected_model_name) logging.info("Make predictions") - tf_dataset = tf.data.Dataset.from_tensor_slices({ - "f1": [1.0, 2.0, 3.0], - "f2": ["cat", "cat", "bird"] - }).batch(2) + tf_dataset = tf.data.Dataset.from_tensor_slices( + {"f1": [1.0, 2.0, 3.0], "f2": ["cat", "cat", "bird"]} + ).batch(2) predictions = loaded_model.predict(tf_dataset) - self.assertAllClose(predictions, - [[0.1, 0.1, 0.8], [0.8, 0.1, 0.1], [0.1, 0.8, 0.1]]) + self.assertAllClose( + predictions, [[0.1, 0.1, 0.8], [0.8, 0.1, 0.1], [0.1, 0.8, 0.1]] + ) @parameterized.parameters((None,), ("",), ("prefix_",)) def test_classification_cart(self, file_prefix): @@ -160,44 +179,61 @@ def test_classification_cart(self, file_prefix): NonLeafNode( condition=NumericalHigherThanCondition( feature=SimpleColumnSpec( - name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL), + name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL + ), threshold=1.5, - missing_evaluation=False), + missing_evaluation=False, + ), pos_child=NonLeafNode( condition=CategoricalIsInCondition( feature=SimpleColumnSpec( name="f2", - type=py_tree.dataspec.ColumnType.CATEGORICAL), + type=py_tree.dataspec.ColumnType.CATEGORICAL, + ), mask=["cat", "dog"], - missing_evaluation=False), + missing_evaluation=False, + ), pos_child=LeafNode( value=ProbabilityValue( - probability=[0.8, 0.1, 0.1], num_examples=10)), + probability=[0.8, 0.1, 0.1], num_examples=10 + ) + ), neg_child=LeafNode( value=ProbabilityValue( - probability=[0.1, 0.8, 0.1], num_examples=20))), + probability=[0.1, 0.8, 0.1], num_examples=20 + ) + ), + ), neg_child=LeafNode( value=ProbabilityValue( - probability=[0.1, 0.1, 0.8], num_examples=30))))) + probability=[0.1, 0.1, 0.8], num_examples=30 + ) + ), + ) + ) + ) builder.close() if file_prefix is not None: self.assertEqual( inspector_lib.detect_model_file_prefix( - os.path.join(model_path, "assets")), file_prefix) + os.path.join(model_path, "assets") + ), + file_prefix, + ) logging.info("Loading model") - loaded_model = tf.keras.models.load_model(model_path) + loaded_model = tf_keras.models.load_model(model_path) self.assertEqual(loaded_model.name, "classification_cart") logging.info("Make predictions") - tf_dataset = tf.data.Dataset.from_tensor_slices({ - "f1": [1.0, 2.0, 3.0], - "f2": ["cat", "cat", "bird"] - }).batch(2) + tf_dataset = tf.data.Dataset.from_tensor_slices( + {"f1": [1.0, 2.0, 3.0], "f2": ["cat", "cat", "bird"]} + ).batch(2) predictions = loaded_model.predict(tf_dataset) - self.assertAllClose(predictions, - [[0.1, 0.1, 0.8], [0.8, 0.1, 0.1], [0.1, 0.8, 0.1]]) + self.assertAllClose( + predictions, [[0.1, 0.1, 0.8], [0.8, 0.1, 0.1], [0.1, 0.8, 0.1]] + ) def test_regression_random_forest(self): model_path = os.path.join(tmp_path(), "regression_rf") @@ -205,7 +241,8 @@ def test_regression_random_forest(self): builder = builder_lib.RandomForestBuilder( path=model_path, model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL, - objective=py_tree.objective.RegressionObjective(label="age")) + objective=py_tree.objective.RegressionObjective(label="age"), + ) # f1>=1.5 # ├─(pos)─ age: 1 @@ -215,18 +252,25 @@ def test_regression_random_forest(self): NonLeafNode( condition=NumericalHigherThanCondition( feature=SimpleColumnSpec( - name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL), + name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL + ), threshold=1.5, - missing_evaluation=False), + missing_evaluation=False, + ), pos_child=LeafNode( - value=RegressionValue(value=1, num_examples=30)), + value=RegressionValue(value=1, num_examples=30) + ), neg_child=LeafNode( - value=RegressionValue(value=2, num_examples=30))))) + value=RegressionValue(value=2, num_examples=30) + ), + ) + ) + ) builder.close() logging.info("Loading model") - loaded_model = tf.keras.models.load_model(model_path) + loaded_model = tf_keras.models.load_model(model_path) logging.info("Make predictions") tf_dataset = tf.data.Dataset.from_tensor_slices({ @@ -243,7 +287,9 @@ def test_regression_random_forest_with_categorical_integer(self): model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL, objective=py_tree.objective.RegressionObjective(label="age"), advanced_arguments=builder_lib.AdvancedArguments( - disable_categorical_integer_offset_correction=True)) + disable_categorical_integer_offset_correction=True + ), + ) # f1 in [2,3] # ├─(pos)─ age: 1 @@ -253,19 +299,25 @@ def test_regression_random_forest_with_categorical_integer(self): NonLeafNode( condition=CategoricalIsInCondition( feature=SimpleColumnSpec( - name="f1", - type=py_tree.dataspec.ColumnType.CATEGORICAL), + name="f1", type=py_tree.dataspec.ColumnType.CATEGORICAL + ), mask=[2, 3], - missing_evaluation=False), + missing_evaluation=False, + ), pos_child=LeafNode( - value=RegressionValue(value=1, num_examples=30)), + value=RegressionValue(value=1, num_examples=30) + ), neg_child=LeafNode( - value=RegressionValue(value=2, num_examples=30))))) + value=RegressionValue(value=2, num_examples=30) + ), + ) + ) + ) builder.close() logging.info("Loading model") - loaded_model = tf.keras.models.load_model(model_path) + loaded_model = tf_keras.models.load_model(model_path) logging.info("Make predictions") tf_dataset = tf.data.Dataset.from_tensor_slices({ @@ -296,18 +348,25 @@ def test_binary_classification_gbt(self): NonLeafNode( condition=NumericalHigherThanCondition( feature=SimpleColumnSpec( - name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL), + name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL + ), threshold=1.5, - missing_evaluation=False), + missing_evaluation=False, + ), pos_child=LeafNode( - value=RegressionValue(value=+1, num_examples=30)), + value=RegressionValue(value=+1, num_examples=30) + ), neg_child=LeafNode( - value=RegressionValue(value=-1, num_examples=30))))) + value=RegressionValue(value=-1, num_examples=30) + ), + ) + ) + ) builder.close() logging.info("Loading model") - loaded_model = tf.keras.models.load_model(model_path) + loaded_model = tf_keras.models.load_model(model_path) self.assertEqual(loaded_model.name, "binary_classification_gbt") logging.info("Make predictions") tf_dataset = tf.data.Dataset.from_tensor_slices({ @@ -316,7 +375,8 @@ def test_binary_classification_gbt(self): predictions = loaded_model.predict(tf_dataset) self.assertAllClose( predictions, - [[1.0 / (1.0 + math.exp(0.0))], [1.0 / (1.0 + math.exp(-2.0))]]) + [[1.0 / (1.0 + math.exp(0.0))], [1.0 / (1.0 + math.exp(-2.0))]], + ) @parameterized.parameters((None,), ("",), ("prefix_",)) def test_multi_class_classification_gbt(self, file_prefix): @@ -326,8 +386,10 @@ def test_multi_class_classification_gbt(self, file_prefix): path=model_path, model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL, objective=py_tree.objective.ClassificationObjective( - label="color", classes=["red", "blue", "green"]), - file_prefix=file_prefix) + label="color", classes=["red", "blue", "green"] + ), + file_prefix=file_prefix, + ) # f1>=1.5 # ├─(pos)─ +1.0 (toward "red") @@ -345,24 +407,33 @@ def test_multi_class_classification_gbt(self, file_prefix): NonLeafNode( condition=NumericalHigherThanCondition( feature=SimpleColumnSpec( - name="f1", - type=py_tree.dataspec.ColumnType.NUMERICAL), + name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL + ), threshold=threshold, - missing_evaluation=False), + missing_evaluation=False, + ), pos_child=LeafNode( - value=RegressionValue(value=+1, num_examples=30)), + value=RegressionValue(value=+1, num_examples=30) + ), neg_child=LeafNode( - value=RegressionValue(value=-1, num_examples=30))))) + value=RegressionValue(value=-1, num_examples=30) + ), + ) + ) + ) builder.close() if file_prefix is not None: self.assertEqual( inspector_lib.detect_model_file_prefix( - os.path.join(model_path, "assets")), file_prefix) + os.path.join(model_path, "assets") + ), + file_prefix, + ) logging.info("Loading model") - loaded_model = tf.keras.models.load_model(model_path) + loaded_model = tf_keras.models.load_model(model_path) logging.info("Make predictions") tf_dataset = tf.data.Dataset.from_tensor_slices({ @@ -371,12 +442,17 @@ def test_multi_class_classification_gbt(self, file_prefix): predictions = loaded_model.predict(tf_dataset) soft_max_sum = np.sum(np.exp([+1, -1, -1])) - self.assertAllClose(predictions, [[1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0], - [ - math.exp(+1) / soft_max_sum, - math.exp(-1) / soft_max_sum, - math.exp(-1) / soft_max_sum - ]]) + self.assertAllClose( + predictions, + [ + [1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0], + [ + math.exp(+1) / soft_max_sum, + math.exp(-1) / soft_max_sum, + math.exp(-1) / soft_max_sum, + ], + ], + ) def test_regression_gbt(self): model_path = os.path.join(tmp_path(), "regression_gbt") @@ -385,7 +461,8 @@ def test_regression_gbt(self): path=model_path, bias=1.0, model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL, - objective=py_tree.objective.RegressionObjective(label="age")) + objective=py_tree.objective.RegressionObjective(label="age"), + ) # bias: 1.0 # f1>=1.5 @@ -396,18 +473,25 @@ def test_regression_gbt(self): NonLeafNode( condition=NumericalHigherThanCondition( feature=SimpleColumnSpec( - name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL), + name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL + ), threshold=1.5, - missing_evaluation=False), + missing_evaluation=False, + ), pos_child=LeafNode( - value=RegressionValue(value=+1, num_examples=30)), + value=RegressionValue(value=+1, num_examples=30) + ), neg_child=LeafNode( - value=RegressionValue(value=-1, num_examples=30))))) + value=RegressionValue(value=-1, num_examples=30) + ), + ) + ) + ) builder.close() logging.info("Loading model") - loaded_model = tf.keras.models.load_model(model_path) + loaded_model = tf_keras.models.load_model(model_path) logging.info("Make predictions") tf_dataset = tf.data.Dataset.from_tensor_slices({ @@ -424,7 +508,9 @@ def test_ranking_gbt(self): bias=1.0, model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL, objective=py_tree.objective.RankingObjective( - label="document", group="query")) + label="document", group="query" + ), + ) # bias: 1.0 # f1>=1.5 @@ -435,18 +521,25 @@ def test_ranking_gbt(self): NonLeafNode( condition=NumericalHigherThanCondition( feature=SimpleColumnSpec( - name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL), + name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL + ), threshold=1.5, - missing_evaluation=False), + missing_evaluation=False, + ), pos_child=LeafNode( - value=RegressionValue(value=+1, num_examples=30)), + value=RegressionValue(value=+1, num_examples=30) + ), neg_child=LeafNode( - value=RegressionValue(value=-1, num_examples=30))))) + value=RegressionValue(value=-1, num_examples=30) + ), + ) + ) + ) builder.close() logging.info("Loading model") - loaded_model = tf.keras.models.load_model(model_path) + loaded_model = tf_keras.models.load_model(model_path) logging.info("Make predictions") tf_dataset = tf.data.Dataset.from_tensor_slices({ @@ -457,71 +550,104 @@ def test_ranking_gbt(self): def test_error_empty_path(self): self.assertRaises( - ValueError, lambda: builder_lib.RandomForestBuilder( + ValueError, + lambda: builder_lib.RandomForestBuilder( path="", model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL, - objective=py_tree.objective.RegressionObjective("label"))) + objective=py_tree.objective.RegressionObjective("label"), + ), + ) def test_error_multi_tree_cart(self): builder = builder_lib.CARTBuilder( path=os.path.join(tmp_path(), "model"), - objective=py_tree.objective.RegressionObjective("label")) + objective=py_tree.objective.RegressionObjective("label"), + ) builder.add_tree(Tree(LeafNode(RegressionValue(1, 30)))) self.assertRaises( ValueError, - lambda: builder.add_tree(Tree(LeafNode(RegressionValue(1, 30))))) + lambda: builder.add_tree(Tree(LeafNode(RegressionValue(1, 30)))), + ) def test_error_reg_cart_with_class_tree(self): builder = builder_lib.CARTBuilder( path=os.path.join(tmp_path(), "model"), - objective=py_tree.objective.RegressionObjective("label")) + objective=py_tree.objective.RegressionObjective("label"), + ) self.assertRaises( - ValueError, lambda: builder.add_tree( + ValueError, + lambda: builder.add_tree( Tree( LeafNode( ProbabilityValue( - probability=[0.8, 0.1, 0.1], num_examples=10))))) + probability=[0.8, 0.1, 0.1], num_examples=10 + ) + ) + ) + ), + ) def test_error_class_cart_with_reg_tree(self): builder = builder_lib.CARTBuilder( path=os.path.join(tmp_path(), "model"), objective=py_tree.objective.ClassificationObjective( - "label", classes=["red", "blue"])) + "label", classes=["red", "blue"] + ), + ) self.assertRaises( ValueError, - lambda: builder.add_tree(Tree(LeafNode(RegressionValue(1, 10))))) + lambda: builder.add_tree(Tree(LeafNode(RegressionValue(1, 10)))), + ) def test_error_wrong_class_leaf_dim(self): builder = builder_lib.CARTBuilder( path=os.path.join(tmp_path(), "model"), objective=py_tree.objective.ClassificationObjective( - "label", classes=["red", "blue"])) + "label", classes=["red", "blue"] + ), + ) self.assertRaises( - ValueError, lambda: builder.add_tree( + ValueError, + lambda: builder.add_tree( Tree( LeafNode( ProbabilityValue( - probability=[0.8, 0.1, 0.1], num_examples=10))))) + probability=[0.8, 0.1, 0.1], num_examples=10 + ) + ) + ) + ), + ) def test_error_gbt_with_class_tree(self): builder = builder_lib.GradientBoostedTreeBuilder( path=os.path.join(tmp_path(), "model"), objective=py_tree.objective.ClassificationObjective( - "label", classes=["red", "blue", "green"])) + "label", classes=["red", "blue", "green"] + ), + ) self.assertRaises( - ValueError, lambda: builder.add_tree( + ValueError, + lambda: builder.add_tree( Tree( LeafNode( ProbabilityValue( - probability=[0.8, 0.1, 0.1], num_examples=10))))) + probability=[0.8, 0.1, 0.1], num_examples=10 + ) + ) + ) + ), + ) def test_error_gbt_wrong_number_of_trees(self): builder = builder_lib.GradientBoostedTreeBuilder( path=os.path.join(tmp_path(), "model"), objective=py_tree.objective.ClassificationObjective( - "label", classes=["red", "blue", "green"])) + "label", classes=["red", "blue", "green"] + ), + ) builder.add_tree(Tree(LeafNode(RegressionValue(1, num_examples=10)))) self.assertRaises(ValueError, builder.close) @@ -530,23 +656,33 @@ def test_get_set_dictionary(self): builder = builder_lib.RandomForestBuilder( path=os.path.join(tmp_path(), "model"), objective=py_tree.objective.ClassificationObjective( - "label", classes=["true", "false"])) + "label", classes=["true", "false"] + ), + ) builder.add_tree( Tree( NonLeafNode( condition=CategoricalIsInCondition( feature=SimpleColumnSpec( - name="f1", - type=py_tree.dataspec.ColumnType.CATEGORICAL), + name="f1", type=py_tree.dataspec.ColumnType.CATEGORICAL + ), mask=["x", "y"], - missing_evaluation=False), + missing_evaluation=False, + ), pos_child=LeafNode( value=ProbabilityValue( - probability=[0.8, 0.2], num_examples=10)), + probability=[0.8, 0.2], num_examples=10 + ) + ), neg_child=LeafNode( value=ProbabilityValue( - probability=[0.2, 0.8], num_examples=20))))) + probability=[0.2, 0.8], num_examples=20 + ) + ), + ) + ) + ) self.assertEqual(builder.get_dictionary("f1"), ["", "x", "y"]) builder.set_dictionary("f1", ["", "x", "y", "z"]) @@ -565,17 +701,23 @@ def test_extract_random_forest(self): dataset = keras.pd_dataframe_to_tf_dataset(dataframe, "income") # Load an inspector to an existing model. - src_model_path = os.path.join(test_model_directory(), - "adult_binary_class_rf") + src_model_path = os.path.join( + test_model_directory(), "adult_binary_class_rf" + ) inspector = inspector_lib.make_inspector(src_model_path) # Extract a piece of this model def custom_model_input_signature( - inspector: inspector_lib.AbstractInspector): + inspector: inspector_lib.AbstractInspector, + ): input_spec = keras.build_default_input_model_signature(inspector) # Those features are stored as int64 in the dataset. for feature_name in [ - "age", "fnlwgt", "capital_gain", "capital_loss", "hours_per_week" + "age", + "fnlwgt", + "capital_gain", + "capital_loss", + "hours_per_week", ]: input_spec[feature_name] = tf.TensorSpec(shape=[None], dtype=tf.int64) return input_spec @@ -587,7 +729,8 @@ def custom_model_input_signature( # Make sure the features and feature dictionaries are the same as in the # original model. import_dataspec=inspector.dataspec, - input_signature_example_fn=custom_model_input_signature) + input_signature_example_fn=custom_model_input_signature, + ) # Extract the first 5 trees for i in range(5): @@ -596,7 +739,7 @@ def custom_model_input_signature( builder.close() - truncated_model = tf.keras.models.load_model(dst_model_path) + truncated_model = tf_keras.models.load_model(dst_model_path) predictions = truncated_model.predict(dataset) self.assertEqual(predictions.shape, (9769, 1)) @@ -607,7 +750,8 @@ def test_fast_serving_with_custom_numerical_default_evaluation(self): path=model_path, bias=0.0, model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL, - objective=py_tree.objective.RegressionObjective(label="label")) + objective=py_tree.objective.RegressionObjective(label="label"), + ) # f1>=-1.0 (default: false) # │ @@ -627,11 +771,14 @@ def condition(feature, threshold, missing_evaluation, pos, neg): return NonLeafNode( condition=NumericalHigherThanCondition( feature=SimpleColumnSpec( - name=feature, type=py_tree.dataspec.ColumnType.NUMERICAL), + name=feature, type=py_tree.dataspec.ColumnType.NUMERICAL + ), threshold=threshold, - missing_evaluation=missing_evaluation), + missing_evaluation=missing_evaluation, + ), pos_child=pos, - neg_child=neg) + neg_child=neg, + ) def leaf(value): return LeafNode(RegressionValue(value=value, num_examples=1)) @@ -639,15 +786,20 @@ def leaf(value): builder.add_tree( Tree( condition( - "f1", -1.0, False, condition("f1", 2.0, False, leaf(1), - leaf(2)), + "f1", + -1.0, + False, + condition("f1", 2.0, False, leaf(1), leaf(2)), condition( "f2", -3.0, True, condition("f2", 4.0, False, leaf(3), leaf(4)), leaf(5), - )))) + ), + ) + ) + ) builder.close() logging.info("Loading model") @@ -658,7 +810,7 @@ def leaf(value): # # TODO:: Add API to check which inference engine is used. - loaded_model = tf.keras.models.load_model(model_path) + loaded_model = tf_keras.models.load_model(model_path) logging.info("Make predictions") tf_dataset = tf.data.Dataset.from_tensor_slices({ diff --git a/tensorflow_decision_forests/component/inspector/inspector_test.py b/tensorflow_decision_forests/component/inspector/inspector_test.py index 8d86f0c2..d03c6452 100644 --- a/tensorflow_decision_forests/component/inspector/inspector_test.py +++ b/tensorflow_decision_forests/component/inspector/inspector_test.py @@ -23,6 +23,7 @@ from absl.testing import parameterized import numpy as np import tensorflow as tf +import tf_keras from tensorflow_decision_forests import keras from tensorflow_decision_forests.component import py_tree @@ -338,7 +339,7 @@ def test_inspect_combined_model(self): features = tf.random.uniform(shape=[100, 2], minval=1, maxval=100) target = tf.random.uniform(shape=[100, 1], minval=25, maxval=50) dataset = tf.data.Dataset.from_tensor_slices((features, target)).batch(32) - inputs = tf.keras.Input(shape=(2,)) + inputs = tf_keras.Input(shape=(2,)) model_rf = keras.RandomForestModel(num_trees=10, task=keras.Task.REGRESSION) model_gbt = keras.GradientBoostedTreesModel(task=keras.Task.REGRESSION) @@ -347,7 +348,7 @@ def model_gbt_preprocessing(x): return tf.concat([model_rf(x), x], axis=1) model_gbt_pred = model_gbt(model_gbt_preprocessing(inputs)) - combined_model = tf.keras.models.Model(inputs, model_gbt_pred) + combined_model = tf_keras.models.Model(inputs, model_gbt_pred) # Train first model. model_rf.fit(dataset) diff --git a/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter.py b/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter.py index cfed099d..7497736b 100644 --- a/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter.py +++ b/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter.py @@ -27,10 +27,12 @@ from sklearn import tree import tensorflow as tf import tensorflow_decision_forests as tfdf +import tf_keras class TaskType(enum.Enum): """The type of task that a scikit-learn model performs.""" + UNKNOWN = 1 SCALAR_REGRESSION = 2 SINGLE_LABEL_CLASSIFICATION = 3 @@ -43,7 +45,7 @@ class TaskType(enum.Enum): def convert( sklearn_model: ScikitLearnModel, intermediate_write_path: Optional[os.PathLike] = None, -) -> tf.keras.Model: +) -> tf_keras.Model: """Converts a tree-based scikit-learn model to a tensorflow model. Currently supported models are: @@ -66,8 +68,8 @@ def convert( process, a TFDF model is written to disk. If intermediate_write_path is specified, the TFDF model is written to this directory. Otherwise, a temporary directory is created that is immediately removed after this - function executes. Note that in order to save the converted model and - load it again later, this argument must be provided. + function executes. Note that in order to save the converted model and load + it again later, this argument must be provided. Returns: a keras Model that emulates the provided scikit-learn model. @@ -87,30 +89,35 @@ def convert( # The resultant tfdf model only receives the features that are used # to split samples in nodes in the trees as input. But we want to pass the # full design matrix as an input to match the scikit-learn API, thus we - # create another tf.keras.Model with the desired call signature. - template_input = tf.keras.Input(shape=(sklearn_model.n_features_in_,)) + # create another tf_keras.Model with the desired call signature. + template_input = tf_keras.Input(shape=(sklearn_model.n_features_in_,)) # Extracts the indices of the features that are used by the TFDF model. # The features have names with the format "feature_". - feature_names = tfdf_model.signatures[ - "serving_default"].structured_input_signature[1].keys() + feature_names = ( + tfdf_model.signatures["serving_default"] + .structured_input_signature[1] + .keys() + ) template_output = tfdf_model( - {i: template_input[:, int(i.split("_")[1])] for i in feature_names}) - return tf.keras.Model(inputs=template_input, outputs=template_output) + {i: template_input[:, int(i.split("_")[1])] for i in feature_names} + ) + return tf_keras.Model(inputs=template_input, outputs=template_output) @functools.singledispatch def _build_tfdf_model( sklearn_model: ScikitLearnModel, path: os.PathLike, -) -> tf.keras.Model: +) -> tf_keras.Model: """Builds a TFDF model from the given scikit-learn model.""" raise NotImplementedError( - f"Can't build a TFDF model for {type(sklearn_model)}") + f"Can't build a TFDF model for {type(sklearn_model)}" + ) @_build_tfdf_model.register(tree.DecisionTreeRegressor) @_build_tfdf_model.register(tree.ExtraTreeRegressor) -def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf.keras.Model: +def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf_keras.Model: """Converts a single scikit-learn regression tree to a TFDF model.""" # The label argument is unused when the model is loaded, so we pass a # placeholder. @@ -119,12 +126,12 @@ def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf.keras.Model: cart_builder = tfdf.builder.CARTBuilder(path=path, objective=objective) cart_builder.add_tree(pytree) cart_builder.close() - return tf.keras.models.load_model(path) + return tf_keras.models.load_model(path) @_build_tfdf_model.register(tree.DecisionTreeClassifier) @_build_tfdf_model.register(tree.ExtraTreeClassifier) -def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf.keras.Model: +def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf_keras.Model: """Converts a single scikit-learn classification tree to a TFDF model.""" objective = tfdf.py_tree.objective.ClassificationObjective( label="label", @@ -136,30 +143,34 @@ def _(sklearn_model: ScikitLearnTree, path: os.PathLike) -> tf.keras.Model: cart_builder = tfdf.builder.CARTBuilder(path=path, objective=objective) cart_builder.add_tree(pytree) cart_builder.close() - return tf.keras.models.load_model(path) + return tf_keras.models.load_model(path) @_build_tfdf_model.register(ensemble.ExtraTreesRegressor) @_build_tfdf_model.register(ensemble.RandomForestRegressor) def _( - sklearn_model: Union[ensemble.ExtraTreesRegressor, ensemble.RandomForestRegressor], + sklearn_model: Union[ + ensemble.ExtraTreesRegressor, ensemble.RandomForestRegressor + ], path: os.PathLike, -) -> tf.keras.Model: +) -> tf_keras.Model: """Converts a forest regression model into a TFDF model.""" objective = tfdf.py_tree.objective.RegressionObjective(label="label") rf_builder = tfdf.builder.RandomForestBuilder(path=path, objective=objective) for single_tree in sklearn_model.estimators_: rf_builder.add_tree(convert_sklearn_tree_to_tfdf_pytree(single_tree)) rf_builder.close() - return tf.keras.models.load_model(path) + return tf_keras.models.load_model(path) @_build_tfdf_model.register(ensemble.ExtraTreesClassifier) @_build_tfdf_model.register(ensemble.RandomForestClassifier) def _( - sklearn_model: Union[ensemble.ExtraTreesClassifier, ensemble.RandomForestClassifier], + sklearn_model: Union[ + ensemble.ExtraTreesClassifier, ensemble.RandomForestClassifier + ], path: os.PathLike, -) -> tf.keras.Model: +) -> tf_keras.Model: """Converts a forest classification model into a TFDF model.""" objective = tfdf.py_tree.objective.ClassificationObjective( label="label", @@ -169,14 +180,14 @@ def _( for single_tree in sklearn_model.estimators_: rf_builder.add_tree(convert_sklearn_tree_to_tfdf_pytree(single_tree)) rf_builder.close() - return tf.keras.models.load_model(path) + return tf_keras.models.load_model(path) @_build_tfdf_model.register(ensemble.GradientBoostingRegressor) def _( sklearn_model: ensemble.GradientBoostingRegressor, path: os.PathLike, -) -> tf.keras.Model: +) -> tf_keras.Model: """Converts a gradient boosting regression model into a TFDF model.""" if isinstance(sklearn_model.init_, dummy.DummyRegressor): # If the initial estimator is a DummyRegressor, then it predicts a constant @@ -194,9 +205,11 @@ def _( init_pytree = None bias = 0.0 else: - raise ValueError("The initial estimator must be either a DummyRegressor" - "or a DecisionTreeRegressor, but got" - f"{type(sklearn_model.init_)}.") + raise ValueError( + "The initial estimator must be either a DummyRegressor" + "or a DecisionTreeRegressor, but got" + f"{type(sklearn_model.init_)}." + ) gbt_builder = tfdf.builder.GradientBoostedTreeBuilder( path=path, @@ -207,12 +220,14 @@ def _( gbt_builder.add_tree(init_pytree) for weak_learner in sklearn_model.estimators_.ravel(): - gbt_builder.add_tree(convert_sklearn_tree_to_tfdf_pytree( - weak_learner, - weight=sklearn_model.learning_rate, - )) + gbt_builder.add_tree( + convert_sklearn_tree_to_tfdf_pytree( + weak_learner, + weight=sklearn_model.learning_rate, + ) + ) gbt_builder.close() - return tf.keras.models.load_model(path) + return tf_keras.models.load_model(path) def convert_sklearn_tree_to_tfdf_pytree( @@ -233,7 +248,8 @@ def convert_sklearn_tree_to_tfdf_pytree( sklearn_tree_data = sklearn_tree.tree_.__getstate__() except AttributeError as e: raise ValueError( - "Scikit-Learn model must be fit to data before converting.") from e + "Scikit-Learn model must be fit to data before converting." + ) from e field_names = sklearn_tree_data["nodes"].dtype.names task_type = _get_sklearn_tree_task_type(sklearn_tree) @@ -251,8 +267,9 @@ def convert_sklearn_tree_to_tfdf_pytree( } if task_type is TaskType.SCALAR_REGRESSION: scaling_factor = weight if weight else 1.0 - node["value"] = tfdf.py_tree.value.RegressionValue(target_value[0][0] * - scaling_factor) + node["value"] = tfdf.py_tree.value.RegressionValue( + target_value[0][0] * scaling_factor + ) elif task_type is TaskType.SINGLE_LABEL_CLASSIFICATION: # Normalise to probabilities if we have a classification tree. probabilities = list(target_value[0] / target_value[0].sum()) @@ -260,7 +277,8 @@ def convert_sklearn_tree_to_tfdf_pytree( else: raise ValueError( "Only scalar regression and single-label classification are " - "supported.") + "supported." + ) nodes.append(node) root_node = _convert_sklearn_node_to_tfdf_node( diff --git a/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter_test.py b/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter_test.py index 890b8c20..b0e21f6c 100644 --- a/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter_test.py +++ b/tensorflow_decision_forests/contrib/scikit_learn_model_converter/scikit_learn_model_converter_test.py @@ -23,6 +23,7 @@ from sklearn import linear_model from sklearn import tree import tensorflow as tf +import tf_keras from tensorflow_decision_forests.contrib import scikit_learn_model_converter @@ -34,12 +35,18 @@ class ScikitLearnModelConverterTest(tf.test.TestCase, parameterized.TestCase): (tree.ExtraTreeRegressor(random_state=42),), (ensemble.RandomForestRegressor(random_state=42),), (ensemble.ExtraTreesRegressor(random_state=42),), - (ensemble.GradientBoostingRegressor(random_state=42,),), + ( + ensemble.GradientBoostingRegressor( + random_state=42, + ), + ), (ensemble.GradientBoostingRegressor(random_state=42, init="zero"),), - (ensemble.GradientBoostingRegressor( - random_state=42, - init=tree.DecisionTreeRegressor(random_state=42), - ),), + ( + ensemble.GradientBoostingRegressor( + random_state=42, + init=tree.DecisionTreeRegressor(random_state=42), + ), + ), ) def test_convert_reproduces_regression_model( self, @@ -69,10 +76,12 @@ def test_convert_reproduces_regression_model( loaded_tf_tree = tf.saved_model.load(path) self.assertAllEqual(tf_tree(tf_features), loaded_tf_tree(tf_features)) - @parameterized.parameters((tree.DecisionTreeClassifier(random_state=42),), - (tree.ExtraTreeClassifier(random_state=42),), - (ensemble.RandomForestClassifier(random_state=42),), - (ensemble.ExtraTreesClassifier(random_state=42),)) + @parameterized.parameters( + (tree.DecisionTreeClassifier(random_state=42),), + (tree.ExtraTreeClassifier(random_state=42),), + (ensemble.RandomForestClassifier(random_state=42),), + (ensemble.ExtraTreesClassifier(random_state=42),), + ) def test_convert_reproduces_classification_model( self, sklearn_tree, @@ -165,11 +174,12 @@ def test_convert_uses_intermediate_model_path_if_provided(self): intermediate_write_path=write_path, ) # We should be able to load the intermediate TFDF model from the given path. - tfdf_tree = tf.keras.models.load_model(write_path) - self.assertIsInstance(tfdf_tree, tf.keras.Model) + tfdf_tree = tf_keras.models.load_model(write_path) + self.assertIsInstance(tfdf_tree, tf_keras.Model) def test_convert_sklearn_tree_to_tfdf_pytree_raises_if_weight_provided_for_classification_tree( - self): + self, + ): features, labels = datasets.make_classification(random_state=42) sklearn_tree = tree.DecisionTreeClassifier(random_state=42).fit( features, @@ -185,7 +195,8 @@ def test_convert_sklearn_tree_to_tfdf_pytree_raises_if_weight_provided_for_class ) def test_convert_raises_when_gbt_initial_estimator_is_not_tree_or_constant( - self): + self, + ): features, labels = datasets.make_regression( n_samples=100, n_features=10, diff --git a/tensorflow_decision_forests/contrib/training_preprocessing/training_preprocessing_test.py b/tensorflow_decision_forests/contrib/training_preprocessing/training_preprocessing_test.py index 51f0ca12..318d98bf 100644 --- a/tensorflow_decision_forests/contrib/training_preprocessing/training_preprocessing_test.py +++ b/tensorflow_decision_forests/contrib/training_preprocessing/training_preprocessing_test.py @@ -21,6 +21,7 @@ import numpy as np import tensorflow as tf import tensorflow_decision_forests as tfdf +import tf_keras from tensorflow_decision_forests.contrib.training_preprocessing import training_preprocessing @@ -231,7 +232,7 @@ def make_dataset(num_examples): model.save(saved_model_path) logging.info('Loading model from %s', saved_model_path) - loaded_model = tf.keras.models.load_model(saved_model_path) + loaded_model = tf_keras.models.load_model(saved_model_path) loaded_model.summary() # Check exported / imported model predictions diff --git a/tensorflow_decision_forests/keras/BUILD b/tensorflow_decision_forests/keras/BUILD index 85dd2a63..613b3c2a 100644 --- a/tensorflow_decision_forests/keras/BUILD +++ b/tensorflow_decision_forests/keras/BUILD @@ -12,9 +12,7 @@ package( cc_library( name = "learners", - deps = [ - "//tensorflow_decision_forests/tensorflow:canonical_learners", - ], + deps = ["//tensorflow_decision_forests/tensorflow:canonical_learners"], alwayslink = 1, ) @@ -57,6 +55,7 @@ py_library( deps = [ ":core", # TensorFlow Python, + # tf_keras dep, "//tensorflow_decision_forests/component/tuner", "@ydf//yggdrasil_decision_forests/learner:abstract_learner_py_proto", "@ydf//yggdrasil_decision_forests/model:abstract_model_py_proto", @@ -67,8 +66,14 @@ py_library( name = "core_inference", srcs = ["core_inference.py"], srcs_version = "PY3", + tags = [ + # Certain deps are not provided on purpose, so this cannot be a python_strict_library. + "ignore_for_dep=third_party.tensorflow.python.framework.type_spec_registry", + ], deps = [ + ":keras_internal", # TensorFlow Python, + # tf_keras dep, # TensorFlow /distribute:input_lib, "//tensorflow_decision_forests/component/inspector", "//tensorflow_decision_forests/component/tuner", @@ -88,7 +93,9 @@ py_library( srcs_version = "PY3", deps = [ ":core_inference", + ":keras_internal", # TensorFlow Python, + # tf_keras dep, # TensorFlow /data/ops:dataset_ops, "//tensorflow_decision_forests/component/inspector", "//tensorflow_decision_forests/component/tuner", @@ -105,6 +112,21 @@ py_library( ], ) +py_library( + name = "keras_internal", + srcs = ["keras_internal.py"], + srcs_version = "PY3", + deps = [ + # tf_keras dep, + # tf_keras/engine dep, + # tf_keras/engine:data_adapter dep, + # tf_keras/engine:input_layer dep, + # tf_keras/feature_column:dense_features_v2 dep, + ## tf_keras/layers dep, + # tf_keras/utils:dataset_creator dep, + ], +) + # Tests # ===== @@ -123,6 +145,7 @@ py_test( deps = [ ":core", ":keras", + ":keras_internal", "@com_google_protobuf//:python_srcs", # absl/flags dep, # absl/logging dep, @@ -130,6 +153,7 @@ py_test( # numpy dep, # pandas dep, # TensorFlow Python, + # tf_keras dep, "//tensorflow_decision_forests/component/inspector", "//tensorflow_decision_forests/component/model_plotter", "//tensorflow_decision_forests/tensorflow:core", @@ -159,6 +183,7 @@ py_test( # pandas dep, # TensorFlow Python, "//tensorflow_decision_forests", + # tf_keras dep, ], ) @@ -182,6 +207,7 @@ py_test( "manual", ], deps = [ + ":keras_internal", # absl/flags dep, # absl/logging dep, # absl/testing:parameterized dep, @@ -189,6 +215,7 @@ py_test( # pandas dep, # portpicker dep, # TensorFlow Python, + # tf_keras dep, # TensorFlow /distribute:distribute_lib, "//tensorflow_decision_forests", "@ydf//yggdrasil_decision_forests/learner/distributed_gradient_boosted_trees:dgbt_py_proto", @@ -208,6 +235,7 @@ py_binary( # numpy dep, # pandas dep, # TensorFlow Python, + # tf_keras dep, "//tensorflow_decision_forests/tensorflow/ops/inference:op_py", ], ) @@ -229,3 +257,13 @@ tf_cc_binary( "@ydf//yggdrasil_decision_forests/utils/distribute/implementations/grpc:grpc_worker_lib_with_main", ], ) + +py_test( + name = "keras_internal_test", + srcs = ["keras_internal_test.py"], + python_version = "PY3", + deps = [ + ":keras_internal", + # TensorFlow Python, + ], +) diff --git a/tensorflow_decision_forests/keras/__init__.py b/tensorflow_decision_forests/keras/__init__.py index 7da378cc..b348b87a 100644 --- a/tensorflow_decision_forests/keras/__init__.py +++ b/tensorflow_decision_forests/keras/__init__.py @@ -44,7 +44,7 @@ # ... # Load a model: it loads as a generic keras model. -loaded_model = tf.keras.models.load_model("/path/to/my/model") +loaded_model = tf_keras.models.load_model("/path/to/my/model") ``` """ diff --git a/tensorflow_decision_forests/keras/core.py b/tensorflow_decision_forests/keras/core.py index f11739bb..c11f3246 100644 --- a/tensorflow_decision_forests/keras/core.py +++ b/tensorflow_decision_forests/keras/core.py @@ -54,6 +54,8 @@ from typing import Optional, List, Dict, Any, Tuple, NamedTuple, Set, Union, Literal import tensorflow as tf +import tf_keras + from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import load_op @@ -70,20 +72,8 @@ from yggdrasil_decision_forests.learner import abstract_learner_pb2 from yggdrasil_decision_forests.model import abstract_model_pb2 # pylint: disable=unused-import from yggdrasil_decision_forests.utils.distribute.implementations.grpc import grpc_pb2 # pylint: disable=unused-import +from tensorflow_decision_forests.keras import keras_internal -try: - # tf>1.12 - import keras.src.engine.data_adapter as data_adapter -except ImportError: - # tf<=1.12 - import keras.engine.data_adapter as data_adapter -get_data_handler = data_adapter.get_data_handler - -layers = tf.keras.layers -models = tf.keras.models -optimizers = tf.keras.optimizers -losses = tf.keras.losses -backend = tf.keras.backend no_automatic_dependency_tracking = ( tf1_compatibility.no_automatic_dependency_tracking @@ -385,7 +375,7 @@ class CoreModel(InferenceCoreModel): # ... # Load a model: it loads as a generic keras model. - model = tf.keras.models.load_model("/tmp/my_saved_model") + model = tf_keras.models.load_model("/tmp/my_saved_model") ``` The training logs (e.g. feature statistics, validation loss, remaining @@ -513,9 +503,9 @@ def __init__( learner_params: Optional[HyperParameters] = None, features: Optional[List[FeatureUsage]] = None, exclude_non_specified_features: Optional[bool] = False, - preprocessing: Optional["models.Functional"] = None, - postprocessing: Optional["models.Functional"] = None, - training_preprocessing: Optional["models.Functional"] = None, + preprocessing: Optional[keras_internal.Functional] = None, + postprocessing: Optional[keras_internal.Functional] = None, + training_preprocessing: Optional[keras_internal.Functional] = None, ranking_group: Optional[str] = None, uplift_treatment: Optional[str] = None, temp_directory: Optional[str] = None, @@ -652,7 +642,7 @@ def load_weights(self, *args, **kwargs): # pylint: disable=useless-super-delega `load_weights` is not supported by TensorFlow Decision Forests models. To save and restore a model, use the SavedModel API i.e. - `model.save(...)` and `tf.keras.models.load_model(...)`. To resume the + `model.save(...)` and `tf_keras.models.load_model(...)`. To resume the training of an existing model, create the model with `try_resume_training=True` (default value) and with a similar `temp_directory` argument. See documentation of `try_resume_training` @@ -1118,7 +1108,7 @@ def fit( steps_per_epoch: Optional[Any] = None, class_weight: Optional[Any] = None, **kwargs, - ) -> tf.keras.callbacks.History: + ) -> tf_keras.callbacks.History: """Trains the model. Local training @@ -1406,7 +1396,7 @@ def _fit_implementation( validation_steps, steps_per_epoch, class_weight, - ) -> tf.keras.callbacks.History: # pylint: disable=g-doc-args,g-doc-return-or-yield + ) -> tf_keras.callbacks.History: # pylint: disable=g-doc-args,g-doc-return-or-yield """Train the model. This method performs operations that resembles as the Keras' fit function. @@ -1426,15 +1416,15 @@ def _fit_implementation( """ # Create the callback manager - if not isinstance(callbacks, tf.keras.callbacks.CallbackList): - callbacks = tf.keras.callbacks.CallbackList( + if not isinstance(callbacks, tf_keras.callbacks.CallbackList): + callbacks = tf_keras.callbacks.CallbackList( callbacks, model=self, add_history=False ) # Manages the history manually. # Note: The both the History and CallbackList object will override the # "model.history" field. - history = tf.keras.callbacks.History() + history = tf_keras.callbacks.History() history.model = self history.on_train_begin() history.on_epoch_begin(0) @@ -1453,7 +1443,7 @@ def _fit_implementation( # training). validation_data_handler = None if validation_data: - val_x, val_y, val_sample_weight = tf.keras.utils.unpack_x_y_sample_weight( + val_x, val_y, val_sample_weight = keras_internal.unpack_x_y_sample_weight( validation_data ) @@ -1464,7 +1454,7 @@ def _fit_implementation( # seems to cause some issues. # Create data_handler for evaluation and cache it. - validation_data_handler = get_data_handler( + validation_data_handler = keras_internal.get_data_handler( x=val_x, y=val_y, sample_weight=val_sample_weight, @@ -1484,7 +1474,7 @@ def _fit_implementation( # Wraps the input training dataset into a tf.data.Dataset like object. # This is a noop if the training dataset is already provided as a # tf.data.Dataset. - data_handler = get_data_handler( + data_handler = keras_internal.get_data_handler( x=x, y=y, sample_weight=sample_weight, @@ -1990,7 +1980,7 @@ def fit_on_dataset_path( def _add_training_logs_to_history( self, - history: tf.keras.callbacks.History, + history: tf_keras.callbacks.History, inspector: Optional[inspector_lib.AbstractInspector] = None, ) -> Optional[Dict[str, Any]]: if inspector is None: @@ -2010,8 +2000,8 @@ def _add_training_logs_to_history( def _training_logs_to_history( self, inspector: Optional[inspector_lib.AbstractInspector] = None - ) -> tf.keras.callbacks.History: - history = tf.keras.callbacks.History() + ) -> tf_keras.callbacks.History: + history = tf_keras.callbacks.History() history.model = self history.on_train_begin() history.on_epoch_begin(0) diff --git a/tensorflow_decision_forests/keras/core_inference.py b/tensorflow_decision_forests/keras/core_inference.py index 5f39a46a..7205d794 100644 --- a/tensorflow_decision_forests/keras/core_inference.py +++ b/tensorflow_decision_forests/keras/core_inference.py @@ -22,11 +22,13 @@ from functools import partial # pylint: disable=g-importing-member import os import tempfile -from typing import Optional, List, Dict, Any, Union, NamedTuple +from typing import Any, Dict, List, NamedTuple, Optional, Union import uuid import zipfile import tensorflow as tf +import tf_keras +from tensorflow_decision_forests.keras import keras_internal from tensorflow.python.distribute import input_lib from tensorflow_decision_forests.component.inspector import inspector as inspector_lib @@ -39,11 +41,6 @@ from yggdrasil_decision_forests.model import abstract_model_pb2 # pylint: disable=unused-import from yggdrasil_decision_forests.utils.distribute.implementations.grpc import grpc_pb2 # pylint: disable=unused-import -layers = tf.keras.layers -models = tf.keras.models -optimizers = tf.keras.optimizers -losses = tf.keras.losses -backend = tf.keras.backend # The length of a model identifier MODEL_IDENTIFIER_LENGTH = 16 @@ -225,7 +222,7 @@ def prediction_key(self) -> str: return self.output if self.output else self.label -class InferenceCoreModel(models.Model): +class InferenceCoreModel(tf_keras.models.Model): """Keras Model V2 wrapper around an Yggdrasil Model. See "CoreModel" in "core.py" for the definition of the arguments. @@ -238,8 +235,8 @@ def __init__( verbose: int = 1, advanced_arguments: Optional[AdvancedArguments] = None, name: Optional[str] = None, - preprocessing: Optional["models.Functional"] = None, - postprocessing: Optional["models.Functional"] = None, + preprocessing: Optional[keras_internal.Functional] = None, + postprocessing: Optional[keras_internal.Functional] = None, uplift_treatment: Optional[str] = None, temp_directory: Optional[str] = None, multitask: Optional[List[MultiTaskItem]] = None, @@ -464,9 +461,8 @@ def make_predict_function(self): # pytype: disable=signature-mismatch # overri @tf.function(reduce_retracing=True) def predict_function_not_trained(iterator): """Prediction of a non-trained model. Returns "zeros".""" - data = next(iterator) - x, _, _ = tf.keras.utils.unpack_x_y_sample_weight(data) + x, _, _ = keras_internal.unpack_x_y_sample_weight(data) batch_size = _batch_size(x) return tf.zeros([batch_size, 1]) @@ -898,7 +894,7 @@ def _extract_sample(self, x): # Extract the example here (instead of inside of "predict") to make # sure this operation is done on the chief. for row in dataset.take(1): - x, _, _ = tf.keras.utils.unpack_x_y_sample_weight(row) + x, _, _ = keras_internal.unpack_x_y_sample_weight(row) return x except Exception: # pylint: disable=broad-except pass @@ -1282,7 +1278,7 @@ def yggdrasil_model_to_keras_model( input_model_signature_fn=input_model_signature_fn, ) - tf.keras.models.save_model(model, dst_path) + tf_keras.models.save_model(model, dst_path) return diff --git a/tensorflow_decision_forests/keras/keras_distributed_test.py b/tensorflow_decision_forests/keras/keras_distributed_test.py index 3c170c55..e123aaa1 100644 --- a/tensorflow_decision_forests/keras/keras_distributed_test.py +++ b/tensorflow_decision_forests/keras/keras_distributed_test.py @@ -32,6 +32,7 @@ from tensorflow.python.distribute import distribute_lib import tensorflow_decision_forests as tfdf +from tensorflow_decision_forests.keras import keras_internal from yggdrasil_decision_forests.learner.distributed_gradient_boosted_trees import distributed_gradient_boosted_trees_pb2 @@ -183,7 +184,7 @@ def dataset_fn( with strategy.scope(): model = tfdf.keras.DistributedGradientBoostedTreesModel(worker_logs=False) model.compile(metrics=["accuracy"]) - # Note: "tf.keras.utils.experimental.DatasetCreator" seems to also work. + # Note: "tf_keras.utils.experimental.DatasetCreator" seems to also work. train_dataset_creator = strategy.distribute_datasets_from_function( lambda context: dataset_fn(context, seed=111) ) @@ -249,7 +250,7 @@ def dataset_fn(input_context): dataset = dataset.prefetch(2) return dataset - dataset_creator = tf.keras.utils.experimental.DatasetCreator(dataset_fn) + dc = keras_internal.DatasetCreator(dataset_fn) cluster_resolver = _create_in_process_tf_ps_cluster(num_workers=2, num_ps=1) @@ -262,9 +263,7 @@ def dataset_fn(input_context): with self.assertRaisesRegex( ValueError, "does not support training with a TF Distribution strategy" ): - model.fit( - dataset_creator, steps_per_epoch=num_examples // global_batch_size - ) + model.fit(dc, steps_per_epoch=num_examples // global_batch_size) def _shard_dataset(self, path, num_shards=20) -> List[str]: """Splits a csv dataset into multiple csv files.""" diff --git a/tensorflow_decision_forests/keras/keras_internal.py b/tensorflow_decision_forests/keras/keras_internal.py new file mode 100644 index 00000000..e28127f9 --- /dev/null +++ b/tensorflow_decision_forests/keras/keras_internal.py @@ -0,0 +1,24 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Access to Keras function with a different internal and external path.""" + +from tf_keras.src.engine import data_adapter as _data_adapter +from tf_keras.src.models import Functional +from tf_keras.layers import DenseFeatures +from tf_keras.src.utils.dataset_creator import DatasetCreator + + +unpack_x_y_sample_weight = _data_adapter.unpack_x_y_sample_weight +get_data_handler = _data_adapter.get_data_handler diff --git a/tensorflow_decision_forests/keras/keras_internal_test.py b/tensorflow_decision_forests/keras/keras_internal_test.py new file mode 100644 index 00000000..72221c98 --- /dev/null +++ b/tensorflow_decision_forests/keras/keras_internal_test.py @@ -0,0 +1,22 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf +from tensorflow_decision_forests.keras import keras_internal + + +# Does nothing. Ensures keras_internal can be loaded. + +if __name__ == "__main__": + tf.test.main() diff --git a/tensorflow_decision_forests/keras/keras_test.py b/tensorflow_decision_forests/keras/keras_test.py index 721438f4..97728417 100644 --- a/tensorflow_decision_forests/keras/keras_test.py +++ b/tensorflow_decision_forests/keras/keras_test.py @@ -31,6 +31,7 @@ import numpy as np import pandas as pd import tensorflow as tf +import tf_keras from google.protobuf import text_format @@ -38,6 +39,7 @@ from tensorflow_decision_forests.component.inspector import inspector as inspector_lib from tensorflow_decision_forests.component.model_plotter import model_plotter from tensorflow_decision_forests.keras import core +from tensorflow_decision_forests.keras import keras_internal from tensorflow_decision_forests.tensorflow import core as tf_core from yggdrasil_decision_forests.dataset import synthetic_dataset_pb2 from yggdrasil_decision_forests.learner import abstract_learner_pb2 @@ -45,13 +47,10 @@ from yggdrasil_decision_forests.learner.random_forest import random_forest_pb2 from yggdrasil_decision_forests.model import abstract_model_pb2 -layers = tf.keras.layers -models = tf.keras.models -optimizers = tf.keras.optimizers -callbacks = tf.keras.callbacks -Normalization = layers.experimental.preprocessing.Normalization -CategoryEncoding = layers.experimental.preprocessing.CategoryEncoding -StringLookup = layers.experimental.preprocessing.StringLookup + +Normalization = tf_keras.layers.Normalization +CategoryEncoding = tf_keras.layers.CategoryEncoding +StringLookup = tf_keras.layers.StringLookup Dataset = collections.namedtuple( "Dataset", ["train", "test", "semantics", "label", "num_classes"] @@ -264,7 +263,7 @@ def build_preprocessing(dataset: Dataset) -> Tuple[List[Any], List[Any]]: normalizer = Normalization(axis=None) normalizer.adapt(raw_input_values) - raw_input = layers.Input(shape=(1,), name=key) + raw_input = tf_keras.layers.Input(shape=(1,), name=key) processed_input = normalizer(raw_input) raw_inputs.append(raw_input) @@ -273,8 +272,8 @@ def build_preprocessing(dataset: Dataset) -> Tuple[List[Any], List[Any]]: elif semantic == keras.FeatureSemantic.CATEGORICAL: if raw_input_values.dtype in [np.int64]: # Integer - raw_input = layers.Input(shape=(1,), name=key, dtype="int64") - raw_input = layers.minimum([raw_input, 5]) + raw_input = tf_keras.layers.Input(shape=(1,), name=key, dtype="int64") + raw_input = tf_keras.layers.minimum([raw_input, 5]) onehot = CategoryEncoding( num_tokens=np.minimum(raw_input_values, 5), output_mode="binary" ) @@ -282,7 +281,7 @@ def build_preprocessing(dataset: Dataset) -> Tuple[List[Any], List[Any]]: else: # String - raw_input = layers.Input(shape=(1,), name=key, dtype="string") + raw_input = tf_keras.layers.Input(shape=(1,), name=key, dtype="string") lookup = StringLookup(max_tokens=5, output_mode="binary") lookup.adapt(raw_input_values) @@ -363,7 +362,9 @@ class Signature(enum.Enum): ANY_FEATURE_COLUMN = 9 -def build_model(signature: Signature, dataset: Dataset, **args) -> models.Model: +def build_model( + signature: Signature, dataset: Dataset, **args +) -> tf_keras.models.Model: """Builds a model with the different supported signatures. Setting nn_baseline=True creates a NN keras model instead. This is useful to @@ -391,25 +392,33 @@ def build_model(signature: Signature, dataset: Dataset, **args) -> models.Model: elif signature == Signature.DENSE_PREPROCESSING: raw_inputs, processed_inputs = build_preprocessing(dataset) - processed_inputs = layers.Concatenate()(processed_inputs) - preprocessing = models.Model(inputs=raw_inputs, outputs=processed_inputs) + processed_inputs = tf_keras.layers.Concatenate()(processed_inputs) + preprocessing = tf_keras.models.Model( + inputs=raw_inputs, outputs=processed_inputs + ) model = keras.RandomForestModel(preprocessing=preprocessing, **args) elif signature == Signature.STRUCTURED_DICTIONARY_PREPROCESSING: raw_inputs, processed_inputs = build_preprocessing(dataset) processed_inputs = {value.name: value for value in processed_inputs} - preprocessing = models.Model(inputs=raw_inputs, outputs=processed_inputs) + preprocessing = tf_keras.models.Model( + inputs=raw_inputs, outputs=processed_inputs + ) model = keras.RandomForestModel(preprocessing=preprocessing, **args) elif signature == Signature.STRUCTURED_LIST_PREPROCESSING: raw_inputs, processed_inputs = build_preprocessing(dataset) - preprocessing = models.Model(inputs=raw_inputs, outputs=processed_inputs) + preprocessing = tf_keras.models.Model( + inputs=raw_inputs, outputs=processed_inputs + ) model = keras.RandomForestModel(preprocessing=preprocessing, **args) elif signature == Signature.STRUCTURED_PREPROCESSING_WITH_SEMANTIC: raw_inputs, processed_inputs = build_preprocessing(dataset) processed_inputs = {value.name: value for value in processed_inputs} - preprocessing = models.Model(inputs=raw_inputs, outputs=processed_inputs) + preprocessing = tf_keras.models.Model( + inputs=raw_inputs, outputs=processed_inputs + ) features = [] for key in processed_inputs.keys(): features.append(keras.FeatureUsage(key)) @@ -419,12 +428,12 @@ def build_model(signature: Signature, dataset: Dataset, **args) -> models.Model: elif signature == Signature.DENSE_FEATURE_COLUMN: feature_columns = build_feature_columns(dataset, dense=True) - preprocessing = layers.DenseFeatures(feature_columns) + preprocessing = keras_internal.DenseFeatures(feature_columns) model = keras.RandomForestModel(preprocessing=preprocessing, **args) elif signature == Signature.ANY_FEATURE_COLUMN: feature_columns = build_feature_columns(dataset, dense=False) - preprocessing = layers.DenseFeatures(feature_columns) + preprocessing = keras_internal.DenseFeatures(feature_columns) model = keras.RandomForestModel(preprocessing=preprocessing, **args) else: @@ -489,7 +498,7 @@ def _check_adult_model( logging.info("Predictions: %s", predictions) if check_serialization: - tf.keras.backend.clear_session() + tf_keras.backend.clear_session() # Export the trained model. saved_model_path = os.path.join(self.get_temp_dir(), "saved_model") @@ -499,7 +508,7 @@ def _check_adult_model( logging.info("Saving model to %s", saved_model_path) model.save(saved_model_path) - tf.keras.backend.clear_session() + tf_keras.backend.clear_session() logging.info("Run model in separate binary") process = subprocess.Popen( @@ -529,7 +538,7 @@ def _check_adult_model( # Load and evaluate the exported trained model. logging.info("Loading model from %s", new_saved_model_path) - loaded_model = models.load_model(new_saved_model_path) + loaded_model = tf_keras.models.load_model(new_saved_model_path) loaded_model.summary() evaluation = loaded_model.evaluate(tf_test) @@ -755,7 +764,7 @@ def test_save_model_without_evaluation(self): # Load and evaluate the exported trained model. logging.info("Loading model from %s", saved_model_path) - loaded_model = models.load_model(saved_model_path) + loaded_model = tf_keras.models.load_model(saved_model_path) loaded_model.summary() loaded_model.compile(metrics=["accuracy"]) @@ -816,7 +825,7 @@ def test_model_adult_dense_feature_columns(self): def test_model_adult_dense_nparray(self): dataset = adult_dataset() feature_columns = build_feature_columns(dataset, dense=True) - dense_features = layers.DenseFeatures(feature_columns) + dense_features = keras_internal.DenseFeatures(feature_columns) train_x = dense_features(dict(dataset.train)).numpy() train_y = dataset.train[dataset.label].values @@ -848,7 +857,7 @@ def test_model_adult_dense_nparray(self): def test_model_adult_dense_tfdataset(self): dataset = adult_dataset() feature_columns = build_feature_columns(dataset, dense=True) - dense_features = layers.DenseFeatures(feature_columns) + dense_features = keras_internal.DenseFeatures(feature_columns) train_x = dense_features(dict(dataset.train)) train_y = dataset.train[dataset.label].values @@ -1168,7 +1177,7 @@ def preprocess(feature_values, label): else: raise ValueError("Non initialized model") - class _TestEvalCallback(tf.keras.callbacks.Callback): + class _TestEvalCallback(tf_keras.callbacks.Callback): def on_train_end(self, logs=None): self.evaluation = model.evaluate(test_dataset) @@ -1305,15 +1314,15 @@ def test_model_adult_df_on_top_of_nn(self): # Note: The following code does not work with the "models.Sequential" API # (Nov.17, 2020). raw_inputs, preprocessed_inputs = build_preprocessing(dataset) - z1 = layers.Concatenate()(preprocessed_inputs) - z2 = layers.Dense(16, activation=tf.nn.relu6)(z1) - z3 = layers.Dense(16, activation=tf.nn.relu, name="last")(z2) - y = layers.Dense(1)(z3) - nn_model = models.Model(raw_inputs, y) + z1 = tf_keras.layers.Concatenate()(preprocessed_inputs) + z2 = tf_keras.layers.Dense(16, activation=tf.nn.relu6)(z1) + z3 = tf_keras.layers.Dense(16, activation=tf.nn.relu, name="last")(z2) + y = tf_keras.layers.Dense(1)(z3) + nn_model = tf_keras.models.Model(raw_inputs, y) nn_model.compile( - optimizer=optimizers.Adam(), - loss=tf.keras.losses.BinaryCrossentropy(), + optimizer=tf_keras.optimizers.Adam(), + loss=tf_keras.losses.BinaryCrossentropy(), metrics=["accuracy"], ) @@ -1322,7 +1331,7 @@ def test_model_adult_df_on_top_of_nn(self): nn_model.summary() # Build a DF on top of the NN - nn_without_head = models.Model( + nn_without_head = tf_keras.models.Model( inputs=nn_model.inputs, outputs=nn_model.get_layer("last").output ) df_model = keras.RandomForestModel(preprocessing=nn_without_head) @@ -1692,7 +1701,7 @@ def test_override_save(self): model_2.fit(keras.pd_dataframe_to_tf_dataset(dataset_2, label="label")) model_2.save(model_path) - model_2_restored = tf.keras.models.load_model(model_path) + model_2_restored = tf_keras.models.load_model(model_path) model_2_restored.predict( keras.pd_dataframe_to_tf_dataset(dataset_2, label="label") ) @@ -1708,7 +1717,7 @@ def test_output_logits(self): self.assertAlmostEqual(np.mean(predictions), -2.2, delta=0.2) self.assertAlmostEqual(np.std(predictions), 2.8, delta=0.25) - model.compile(metrics=[tf.keras.metrics.BinaryAccuracy(threshold=0.0)]) + model.compile(metrics=[tf_keras.metrics.BinaryAccuracy(threshold=0.0)]) evaluation = model.evaluate(tf_test, return_dict=True) logging.info("Evaluation: %s", evaluation) @@ -1734,9 +1743,9 @@ def make_dataset(): model = keras.GradientBoostedTreesModel() - inputs = tf.keras.layers.Input(shape=(num_features,)) + inputs = tf_keras.layers.Input(shape=(num_features,)) outputs = model(inputs) - functional_model = tf.keras.Model(inputs=inputs, outputs=outputs) + functional_model = tf_keras.Model(inputs=inputs, outputs=outputs) # Generate predictions before training. for features, _ in test_dataset.take(1): @@ -2277,7 +2286,7 @@ def test_properties(self): def test_golden_model_gbt(self): dataset = adult_dataset() - loaded_model = models.load_model( + loaded_model = tf_keras.models.load_model( os.path.join(tfdf_test_data_path(), "model/saved_model_adult_rf") ) prediction = loaded_model.predict( @@ -2323,7 +2332,7 @@ def custom_model_input_signature( tfdf_model_path, input_model_signature_fn=custom_model_input_signature, ) - loaded_model = models.load_model(tfdf_model_path) + loaded_model = tf_keras.models.load_model(tfdf_model_path) dataset = adult_dataset() prediction = loaded_model.predict( keras.pd_dataframe_to_tf_dataset(dataset.test, label="income") @@ -2346,7 +2355,7 @@ def test_ydf_to_keras_model_uplift(self): test_df = test_df.drop(treatment_group, axis=1) core.yggdrasil_model_to_keras_model(ygg_model_path, tfdf_model_path) - loaded_model = models.load_model(tfdf_model_path) + loaded_model = tf_keras.models.load_model(tfdf_model_path) prediction = loaded_model.predict( keras.pd_dataframe_to_tf_dataset(test_df, label=outcome_key) ) @@ -2385,7 +2394,7 @@ def test_ydf_to_keras_model_with_source_container( core.yggdrasil_model_to_keras_model(src_model_path, dst_model_path) # Load/Check the model - _ = models.load_model(dst_model_path) + _ = tf_keras.models.load_model(dst_model_path) def test_load_combined_model(self): target = tf.random.uniform(shape=[100, 1], minval=25, maxval=50) @@ -2393,7 +2402,7 @@ def test_load_combined_model(self): "my_feature": tf.random.uniform(shape=[100, 2], minval=1, maxval=100) } dataset = tf.data.Dataset.from_tensor_slices((features, target)).batch(32) - inputs = {"my_feature": tf.keras.Input(shape=(2,))} + inputs = {"my_feature": tf_keras.Input(shape=(2,))} model_1 = keras.RandomForestModel(num_trees=10, task=keras.Task.REGRESSION) model_2 = keras.RandomForestModel(num_trees=20, task=keras.Task.REGRESSION) @@ -2403,7 +2412,7 @@ def model_2_preprocessing(x): model_2_pred = model_2(model_2_preprocessing(inputs)) - combined_model = models.Model(inputs, model_2_pred) + combined_model = tf_keras.models.Model(inputs, model_2_pred) # Train first model. model_1.fit(dataset) @@ -2417,7 +2426,7 @@ def mix(x, y): combined_model_path = os.path.join(tmp_path(), "combined_model") combined_model.save(combined_model_path, overwrite=True) combined_model_prediction = combined_model.predict([[1, 1]]) - loaded_combined_model = models.load_model(combined_model_path) + loaded_combined_model = tf_keras.models.load_model(combined_model_path) # Check if inference is working on the combined model. loaded_combined_model_prediction = loaded_combined_model.predict([[1, 1]]) @@ -2435,7 +2444,7 @@ def mix(x, y): loaded_model_1_path, file_prefix=model_1.training_model_id, ) - loaded_model_1 = models.load_model(loaded_model_1_path) + loaded_model_1 = tf_keras.models.load_model(loaded_model_1_path) logging.info( "Prediction result 1 is %s", loaded_model_1.predict(examples_1) ) @@ -2449,7 +2458,7 @@ def mix(x, y): loaded_model_2_path, file_prefix=model_2.training_model_id, ) - loaded_model_2 = models.load_model(loaded_model_2_path) + loaded_model_2 = tf_keras.models.load_model(loaded_model_2_path) logging.info( "Prediction result 2 is %s", loaded_model_2.predict(examples_2) ) @@ -2642,7 +2651,7 @@ def make_dataset(num_examples): model.save(saved_model_path) logging.info("Loading model from %s", saved_model_path) - loaded_model = models.load_model(saved_model_path) + loaded_model = tf_keras.models.load_model(saved_model_path) loaded_model.summary() # Check exported / imported model predictions @@ -2728,7 +2737,7 @@ def make_dataset(num_examples): model.save(saved_model_path) logging.info("Loading model from %s", saved_model_path) - loaded_model = models.load_model(saved_model_path) + loaded_model = tf_keras.models.load_model(saved_model_path) loaded_model.summary() # Check exported / imported model predictions @@ -2901,7 +2910,7 @@ def test_plot_ydf_model(self): ) model_tmp_path_keras = os.path.join(self.get_temp_dir(), "kerasmodel") keras.yggdrasil_model_to_keras_model(ygg_model_path, model_tmp_path_keras) - model = tf.keras.models.load_model(model_tmp_path_keras) + model = tf_keras.models.load_model(model_tmp_path_keras) tree_plot = model_plotter.plot_model(model, tree_idx=0, max_depth=2) expected_tree_start = ( 'display_tree({"margin": 10, "node_x_size": 160, "node_y_size": 28,' diff --git a/tensorflow_decision_forests/keras/test_runner.py b/tensorflow_decision_forests/keras/test_runner.py index b2abbfd4..07bbe1e3 100644 --- a/tensorflow_decision_forests/keras/test_runner.py +++ b/tensorflow_decision_forests/keras/test_runner.py @@ -30,6 +30,7 @@ import numpy as np import pandas as pd import tensorflow as tf +import tf_keras # Inject the inference ops only. from tensorflow_decision_forests.tensorflow.ops.inference import op # pylint: disable=unused-import @@ -45,7 +46,7 @@ def main(argv): del argv logging.info("Load model") - model = tf.keras.models.load_model(FLAGS.model_path) + model = tf_keras.models.load_model(FLAGS.model_path) logging.info("Load dataset") pd_dataset = load_dataset() diff --git a/tensorflow_decision_forests/keras/wrapper/wrapper.cc b/tensorflow_decision_forests/keras/wrapper/wrapper.cc index 76529ba3..ffec1e5b 100644 --- a/tensorflow_decision_forests/keras/wrapper/wrapper.cc +++ b/tensorflow_decision_forests/keras/wrapper/wrapper.cc @@ -254,6 +254,7 @@ documentation (and meta-data) used to generate this file. from typing import Optional, List, Set import tensorflow as tf +import tf_keras $0 TaskType = "abstract_model_pb2.Task" # pylint: disable=invalid-name AdvancedArguments = core.AdvancedArguments @@ -550,9 +551,9 @@ class $0(core.CoreModel): task: Optional[TaskType] = core.Task.CLASSIFICATION, features: Optional[List[core.FeatureUsage]] = None, exclude_non_specified_features: Optional[bool] = False, - preprocessing: Optional["tf.keras.models.Functional"] = None, - postprocessing: Optional["tf.keras.models.Functional"] = None, - training_preprocessing: Optional["tf.keras.models.Functional"] = None, + preprocessing: Optional["tf_keras.models.Functional"] = None, + postprocessing: Optional["tf_keras.models.Functional"] = None, + training_preprocessing: Optional["tf_keras.models.Functional"] = None, ranking_group: Optional[str] = None, uplift_treatment: Optional[str] = None, temp_directory: Optional[str] = None, diff --git a/tensorflow_decision_forests/keras/wrappers_pre_generated.py b/tensorflow_decision_forests/keras/wrappers_pre_generated.py index 20527189..35752bd9 100644 --- a/tensorflow_decision_forests/keras/wrappers_pre_generated.py +++ b/tensorflow_decision_forests/keras/wrappers_pre_generated.py @@ -28,6 +28,7 @@ from typing import Optional, List, Set import tensorflow as tf +import tf_keras from tensorflow_decision_forests.keras import core from tensorflow_decision_forests.component.tuner import tuner as tuner_lib @@ -341,9 +342,9 @@ def __init__( task: Optional[TaskType] = core.Task.CLASSIFICATION, features: Optional[List[core.FeatureUsage]] = None, exclude_non_specified_features: Optional[bool] = False, - preprocessing: Optional["tf.keras.models.Functional"] = None, - postprocessing: Optional["tf.keras.models.Functional"] = None, - training_preprocessing: Optional["tf.keras.models.Functional"] = None, + preprocessing: Optional["tf_keras.models.Functional"] = None, + postprocessing: Optional["tf_keras.models.Functional"] = None, + training_preprocessing: Optional["tf_keras.models.Functional"] = None, ranking_group: Optional[str] = None, uplift_treatment: Optional[str] = None, temp_directory: Optional[str] = None, @@ -479,6 +480,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities: support_partial_cache_dataset_format=False ) + class DistributedGradientBoostedTreesModel(core.CoreModel): r"""Distributed Gradient Boosted Trees learning algorithm. @@ -682,9 +684,9 @@ def __init__( task: Optional[TaskType] = core.Task.CLASSIFICATION, features: Optional[List[core.FeatureUsage]] = None, exclude_non_specified_features: Optional[bool] = False, - preprocessing: Optional["tf.keras.models.Functional"] = None, - postprocessing: Optional["tf.keras.models.Functional"] = None, - training_preprocessing: Optional["tf.keras.models.Functional"] = None, + preprocessing: Optional["tf_keras.models.Functional"] = None, + postprocessing: Optional["tf_keras.models.Functional"] = None, + training_preprocessing: Optional["tf_keras.models.Functional"] = None, ranking_group: Optional[str] = None, uplift_treatment: Optional[str] = None, temp_directory: Optional[str] = None, @@ -782,6 +784,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities: support_partial_cache_dataset_format=True ) + class GradientBoostedTreesModel(core.CoreModel): r"""Gradient Boosted Trees learning algorithm. @@ -1199,9 +1202,9 @@ def __init__( task: Optional[TaskType] = core.Task.CLASSIFICATION, features: Optional[List[core.FeatureUsage]] = None, exclude_non_specified_features: Optional[bool] = False, - preprocessing: Optional["tf.keras.models.Functional"] = None, - postprocessing: Optional["tf.keras.models.Functional"] = None, - training_preprocessing: Optional["tf.keras.models.Functional"] = None, + preprocessing: Optional["tf_keras.models.Functional"] = None, + postprocessing: Optional["tf_keras.models.Functional"] = None, + training_preprocessing: Optional["tf_keras.models.Functional"] = None, ranking_group: Optional[str] = None, uplift_treatment: Optional[str] = None, temp_directory: Optional[str] = None, @@ -1416,6 +1419,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities: support_partial_cache_dataset_format=False ) + class HyperparameterOptimizerModel(core.CoreModel): r"""Hyperparameter Optimizer learning algorithm. @@ -1572,9 +1576,9 @@ def __init__( task: Optional[TaskType] = core.Task.CLASSIFICATION, features: Optional[List[core.FeatureUsage]] = None, exclude_non_specified_features: Optional[bool] = False, - preprocessing: Optional["tf.keras.models.Functional"] = None, - postprocessing: Optional["tf.keras.models.Functional"] = None, - training_preprocessing: Optional["tf.keras.models.Functional"] = None, + preprocessing: Optional["tf_keras.models.Functional"] = None, + postprocessing: Optional["tf_keras.models.Functional"] = None, + training_preprocessing: Optional["tf_keras.models.Functional"] = None, ranking_group: Optional[str] = None, uplift_treatment: Optional[str] = None, temp_directory: Optional[str] = None, @@ -1648,6 +1652,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities: support_partial_cache_dataset_format=False ) + class MultitaskerModel(core.CoreModel): r"""Multitasker learning algorithm. @@ -1804,9 +1809,9 @@ def __init__( task: Optional[TaskType] = core.Task.CLASSIFICATION, features: Optional[List[core.FeatureUsage]] = None, exclude_non_specified_features: Optional[bool] = False, - preprocessing: Optional["tf.keras.models.Functional"] = None, - postprocessing: Optional["tf.keras.models.Functional"] = None, - training_preprocessing: Optional["tf.keras.models.Functional"] = None, + preprocessing: Optional["tf_keras.models.Functional"] = None, + postprocessing: Optional["tf_keras.models.Functional"] = None, + training_preprocessing: Optional["tf_keras.models.Functional"] = None, ranking_group: Optional[str] = None, uplift_treatment: Optional[str] = None, temp_directory: Optional[str] = None, @@ -1880,6 +1885,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities: support_partial_cache_dataset_format=False ) + class RandomForestModel(core.CoreModel): r"""Random Forest learning algorithm. @@ -2231,9 +2237,9 @@ def __init__( task: Optional[TaskType] = core.Task.CLASSIFICATION, features: Optional[List[core.FeatureUsage]] = None, exclude_non_specified_features: Optional[bool] = False, - preprocessing: Optional["tf.keras.models.Functional"] = None, - postprocessing: Optional["tf.keras.models.Functional"] = None, - training_preprocessing: Optional["tf.keras.models.Functional"] = None, + preprocessing: Optional["tf_keras.models.Functional"] = None, + postprocessing: Optional["tf_keras.models.Functional"] = None, + training_preprocessing: Optional["tf_keras.models.Functional"] = None, ranking_group: Optional[str] = None, uplift_treatment: Optional[str] = None, temp_directory: Optional[str] = None, diff --git a/tensorflow_decision_forests/tensorflow/BUILD b/tensorflow_decision_forests/tensorflow/BUILD index d62ac107..365c7c50 100644 --- a/tensorflow_decision_forests/tensorflow/BUILD +++ b/tensorflow_decision_forests/tensorflow/BUILD @@ -77,6 +77,7 @@ py_library( # numpy dep, # pandas dep, # TensorFlow Python, + # tf_keras dep, "//tensorflow_decision_forests/component/inspector", "@ydf//yggdrasil_decision_forests/dataset:data_spec_py_proto", "@ydf//yggdrasil_decision_forests/model:abstract_model_py_proto", @@ -92,6 +93,7 @@ py_library( # numpy dep, # pandas dep, # TensorFlow Python, + # tf_keras dep, # TensorFlow /distribute:distribute_lib, # TensorFlow /distribute:parameter_server_strategy_v2, # TensorFlow /distribute/coordinator:cluster_coordinator, @@ -143,6 +145,7 @@ py_library( srcs_version = "PY3", deps = [ # TensorFlow Python, + # tf_keras dep, # TensorFlow /trackable, ], ) @@ -160,6 +163,7 @@ py_test( # absl/logging dep, # pandas dep, # TensorFlow Python, + # tf_keras dep, "@ydf//yggdrasil_decision_forests/dataset:data_spec_py_proto", "@ydf//yggdrasil_decision_forests/model:hyperparameter_py_proto", ], diff --git a/tools/test_bazel.sh b/tools/test_bazel.sh index f26afbeb..41857a5c 100755 --- a/tools/test_bazel.sh +++ b/tools/test_bazel.sh @@ -51,7 +51,7 @@ function is_macos() { # Install Pip dependencies ${PYTHON} -m ensurepip --upgrade || true ${PYTHON} -m pip install pip setuptools --upgrade -${PYTHON} -m pip install numpy pandas scikit-learn +${PYTHON} -m pip install numpy pandas scikit-learn tf_keras # Install Tensorflow at the chosen version. if [ ${TF_VERSION} == "nightly" ]; then