From 8022aa1fc106f572e590f69e8a94eb6733d7964b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 25 Jul 2024 14:05:27 +0200 Subject: [PATCH] Refactor code --- docs/examples/example_onnx.ipynb | 44 +++++++++++------------------- metalearners/drlearner.py | 12 ++++++--- metalearners/metalearner.py | 22 ++++++++++++--- metalearners/rlearner.py | 14 ++++++---- metalearners/slearner.py | 2 +- metalearners/tlearner.py | 14 ++++++---- metalearners/xlearner.py | 20 ++++++++++---- tests/test_drlearner.py | 36 ++++++++++++++----------- tests/test_rlearner.py | 36 ++++++++++++++----------- tests/test_tlearner.py | 36 ++++++++++++++----------- tests/test_xlearner.py | 46 +++++++++++++++----------------- 11 files changed, 157 insertions(+), 125 deletions(-) diff --git a/docs/examples/example_onnx.ipynb b/docs/examples/example_onnx.ipynb index 9cfe991..2575232 100644 --- a/docs/examples/example_onnx.ipynb +++ b/docs/examples/example_onnx.ipynb @@ -146,7 +146,7 @@ "### Converting the base models to ONNX\n", "\n", "Before being able to convert the MetaLearner to ONXX we need to manually convert the necessary\n", - "base models for the prediction. To get a list of the necessary base models that need to be\n", + "base models for the prediction. To get the necessary base models that need to be\n", "converted we can use :meth:`~metalearners.MetaLearner._necessary_onnx_models`." ] }, @@ -156,7 +156,8 @@ "metadata": {}, "outputs": [], "source": [ - "xlearner._necessary_onnx_models()" + "necessary_models = xlearner._necessary_onnx_models()\n", + "necessary_models" ] }, { @@ -185,33 +186,18 @@ "from onnxmltools import convert_lightgbm\n", "from onnxconverter_common.data_types import FloatTensorType\n", "\n", - "onnx_models: dict[str, list[onnx.ModelProto]] = {\n", - " \"control_effect_model\": [],\n", - " \"treatment_effect_model\": [],\n", - " \"propensity_model\": [],\n", - "}\n", - "\n", - "for model in xlearner._nuisance_models[\"propensity_model\"]:\n", - " onnx_model = convert_lightgbm(\n", - " model._overall_estimator,\n", - " initial_types=[(\"X\", FloatTensorType([None, len(feature_columns)]))],\n", - " zipmap=False,\n", - " )\n", - " onnx_models[\"propensity_model\"].append(onnx_model)\n", - "\n", - "for model in xlearner._treatment_models[\"control_effect_model\"]:\n", - " onnx_model = convert_lightgbm(\n", - " model._overall_estimator,\n", - " initial_types=[(\"X\", FloatTensorType([None, len(feature_columns)]))],\n", - " )\n", - " onnx_models[\"control_effect_model\"].append(onnx_model)\n", - "\n", - "for model in xlearner._treatment_models[\"treatment_effect_model\"]:\n", - " onnx_model = convert_lightgbm(\n", - " model._overall_estimator,\n", - " initial_types=[(\"X\", FloatTensorType([None, len(feature_columns)]))],\n", - " )\n", - " onnx_models[\"treatment_effect_model\"].append(onnx_model)" + "onnx_models: dict[str, list[onnx.ModelProto]] = {}\n", + "\n", + "for model_kind, models in necessary_models.items():\n", + " onnx_models[model_kind] = []\n", + " for model in models:\n", + " onnx_models[model_kind].append(\n", + " convert_lightgbm(\n", + " model,\n", + " initial_types=[(\"X\", FloatTensorType([None, len(feature_columns)]))],\n", + " zipmap=False,\n", + " )\n", + " )" ] }, { diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index 229b4ae..38dbab2 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -44,6 +44,7 @@ _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, + get_overall_estimators, ) _EPSILON = 1e-09 @@ -405,9 +406,12 @@ def _pseudo_outcome( return pseudo_outcome - @classmethod - def _necessary_onnx_models(cls) -> set[str]: - return {TREATMENT_MODEL} + def _necessary_onnx_models(self) -> dict[str, list[_ScikitModel]]: + return { + TREATMENT_MODEL: get_overall_estimators( + self._treatment_models[TREATMENT_MODEL] + ) + } @copydoc(MetaLearner._build_onnx, sep="") def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"): @@ -422,7 +426,7 @@ def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"): from spox import Var, build, inline self._validate_feature_set_none() - self._validate_onnx_models(models, self._necessary_onnx_models()) + self._validate_onnx_models(models, set(self._necessary_onnx_models().keys())) input_dict = infer_input_dict(models[TREATMENT_MODEL][0]) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 249f33c..2b702d9 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -138,6 +138,17 @@ def _validate_n_folds_synchronize(n_folds: dict[str, int]) -> None: raise ValueError("Need at least two folds to use synchronization.") +def get_overall_estimators(cfes: list[CrossFitEstimator]) -> list[_ScikitModel]: + overall_estimators = [] + for cfe in cfes: + if cfe._overall_estimator is None: + raise ValueError( + "To use this functionality the overall models need to be fitted." + ) + overall_estimators.append(cfe._overall_estimator) + return overall_estimators + + def _evaluate_model_kind( cfes: Sequence[CrossFitEstimator], Xs: Sequence[Matrix], @@ -1183,10 +1194,15 @@ def _validate_feature_set_none(self): "as feature set (and therefore use all the features)." ) - @classmethod @abstractmethod - def _necessary_onnx_models(cls) -> set[str]: - """Return a set with the necessary models to convert the MetaLearner to ONNX.""" + def _necessary_onnx_models(self) -> dict[str, list[_ScikitModel]]: + """Return a dictionary with the necessary models to convert the MetaLearner to + ONNX. + + The returned dictionary keys will be strings and the values will be list of the + overall base models (trained on the complete dataset) which should be converted + to onnx. + """ ... @abstractmethod diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index 6c3d500..a43ff8e 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -9,7 +9,7 @@ from sklearn.metrics import root_mean_squared_error from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Scoring, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector, _ScikitModel from metalearners._utils import ( check_spox_installed, clip_element_absolute_value_to_epsilon, @@ -35,6 +35,7 @@ _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, + get_overall_estimators, ) OUTCOME_MODEL = "outcome_model" @@ -525,9 +526,12 @@ def _pseudo_outcome_and_weights( return pseudo_outcomes, weights - @classmethod - def _necessary_onnx_models(cls) -> set[str]: - return {TREATMENT_MODEL} + def _necessary_onnx_models(self) -> dict[str, list[_ScikitModel]]: + return { + TREATMENT_MODEL: get_overall_estimators( + self._treatment_models[TREATMENT_MODEL] + ) + } @copydoc(MetaLearner._build_onnx, sep="") def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"): @@ -542,7 +546,7 @@ def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"): from spox import Var, build, inline self._validate_feature_set_none() - self._validate_onnx_models(models, self._necessary_onnx_models()) + self._validate_onnx_models(models, set(self._necessary_onnx_models().keys())) input_dict = infer_input_dict(models[TREATMENT_MODEL][0]) diff --git a/metalearners/slearner.py b/metalearners/slearner.py index 250d1f7..e636c0f 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -300,7 +300,7 @@ def predict_conditional_average_outcomes( ) @classmethod - def _necessary_onnx_models(cls) -> set[str]: + def _necessary_onnx_models(cls) -> dict[str, list[_ScikitModel]]: raise ValueError( "The SLearner does not implement this method. Please refer to comment in the tutorial." ) diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index 367147d..c009e59 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -7,7 +7,7 @@ from joblib import Parallel, delayed from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Scoring, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector, _ScikitModel from metalearners._utils import ( check_spox_installed, copydoc, @@ -26,6 +26,7 @@ _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, + get_overall_estimators, ) @@ -150,9 +151,12 @@ def evaluate( feature_set=self.feature_set[VARIANT_OUTCOME_MODEL], ) - @classmethod - def _necessary_onnx_models(cls) -> set[str]: - return {VARIANT_OUTCOME_MODEL} + def _necessary_onnx_models(self) -> dict[str, list[_ScikitModel]]: + return { + VARIANT_OUTCOME_MODEL: get_overall_estimators( + self._nuisance_models[VARIANT_OUTCOME_MODEL] + ) + } @copydoc(MetaLearner._build_onnx, sep="") def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"): @@ -167,7 +171,7 @@ def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"): from spox import build, inline self._validate_feature_set_none() - self._validate_onnx_models(models, self._necessary_onnx_models()) + self._validate_onnx_models(models, set(self._necessary_onnx_models().keys())) input_dict = infer_input_dict(models[VARIANT_OUTCOME_MODEL][0]) diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index fca52ec..5965b96 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -8,7 +8,7 @@ from joblib import Parallel, delayed from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Scoring, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector, _ScikitModel from metalearners._utils import ( check_spox_installed, copydoc, @@ -33,6 +33,7 @@ _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, + get_overall_estimators, ) CONTROL_EFFECT_MODEL = "control_effect_model" @@ -438,9 +439,18 @@ def _pseudo_outcome( return imputed_te_control, imputed_te_treatment - @classmethod - def _necessary_onnx_models(cls) -> set[str]: - return {PROPENSITY_MODEL, CONTROL_EFFECT_MODEL, TREATMENT_EFFECT_MODEL} + def _necessary_onnx_models(self) -> dict[str, list[_ScikitModel]]: + return { + PROPENSITY_MODEL: get_overall_estimators( + self._nuisance_models[PROPENSITY_MODEL] + ), + CONTROL_EFFECT_MODEL: get_overall_estimators( + self._treatment_models[CONTROL_EFFECT_MODEL] + ), + TREATMENT_EFFECT_MODEL: get_overall_estimators( + self._treatment_models[TREATMENT_EFFECT_MODEL] + ), + } @copydoc(MetaLearner._build_onnx, sep="") def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"): @@ -457,7 +467,7 @@ def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"): from spox import Var, build, inline self._validate_feature_set_none() - self._validate_onnx_models(models, self._necessary_onnx_models()) + self._validate_onnx_models(models, set(self._necessary_onnx_models().keys())) input_dict = infer_input_dict(models[PROPENSITY_MODEL][0]) diff --git a/tests/test_drlearner.py b/tests/test_drlearner.py index 2ccb9cb..7579032 100644 --- a/tests/test_drlearner.py +++ b/tests/test_drlearner.py @@ -7,6 +7,7 @@ import onnxruntime as rt import pytest from lightgbm import LGBMClassifier, LGBMRegressor +from onnx import ModelProto from onnxconverter_common.data_types import FloatTensorType from onnxmltools import convert_lightgbm, convert_xgboost from skl2onnx import convert_sklearn @@ -16,7 +17,6 @@ from metalearners import DRLearner from metalearners._typing import Params -from metalearners.metalearner import TREATMENT_MODEL from .conftest import all_sklearn_regressors @@ -94,23 +94,27 @@ def test_drlearner_onnx( ) ml.fit(X, y, w) - onnx_models = [] - for tv in range(n_variants - 1): - model = ml._treatment_models[TREATMENT_MODEL][tv]._overall_estimator - onnx_model = onnx_converter( - model, - initial_types=[ - ( - "X", - FloatTensorType( - [None, n_numerical_features + n_categorical_features] - ), + necessary_models = ml._necessary_onnx_models() + onnx_models: dict[str, list[ModelProto]] = {} + + for model_kind, models in necessary_models.items(): + onnx_models[model_kind] = [] + for model in models: + onnx_models[model_kind].append( + onnx_converter( + model, + initial_types=[ + ( + "X", + FloatTensorType( + [None, n_numerical_features + n_categorical_features] + ), + ) + ], ) - ], - ) - onnx_models.append(onnx_model) + ) - final = ml._build_onnx({TREATMENT_MODEL: onnx_models}) + final = ml._build_onnx(onnx_models) sess = rt.InferenceSession( final.SerializeToString(), providers=rt.get_available_providers() ) diff --git a/tests/test_rlearner.py b/tests/test_rlearner.py index c35a508..0455f60 100644 --- a/tests/test_rlearner.py +++ b/tests/test_rlearner.py @@ -8,6 +8,7 @@ import pandas as pd import pytest from lightgbm import LGBMClassifier, LGBMRegressor +from onnx import ModelProto from onnxconverter_common.data_types import FloatTensorType from onnxmltools import convert_lightgbm, convert_xgboost from skl2onnx import convert_sklearn @@ -15,7 +16,6 @@ from xgboost import XGBRegressor from metalearners._utils import function_has_argument -from metalearners.metalearner import TREATMENT_MODEL from metalearners.rlearner import RLearner, r_loss from .conftest import all_sklearn_regressors @@ -99,23 +99,27 @@ def test_rlearner_onnx( ) ml.fit(X, y, w) - onnx_models = [] - for tv in range(n_variants - 1): - model = ml._treatment_models[TREATMENT_MODEL][tv]._overall_estimator - onnx_model = onnx_converter( - model, - initial_types=[ - ( - "X", - FloatTensorType( - [None, n_numerical_features + n_categorical_features] - ), + necessary_models = ml._necessary_onnx_models() + onnx_models: dict[str, list[ModelProto]] = {} + + for model_kind, models in necessary_models.items(): + onnx_models[model_kind] = [] + for model in models: + onnx_models[model_kind].append( + onnx_converter( + model, + initial_types=[ + ( + "X", + FloatTensorType( + [None, n_numerical_features + n_categorical_features] + ), + ) + ], ) - ], - ) - onnx_models.append(onnx_model) + ) - final = ml._build_onnx({TREATMENT_MODEL: onnx_models}) + final = ml._build_onnx(onnx_models) sess = rt.InferenceSession( final.SerializeToString(), providers=rt.get_available_providers() ) diff --git a/tests/test_tlearner.py b/tests/test_tlearner.py index 5732673..9c8d33f 100644 --- a/tests/test_tlearner.py +++ b/tests/test_tlearner.py @@ -8,6 +8,7 @@ import onnxruntime as rt import pytest from lightgbm import LGBMClassifier, LGBMRegressor +from onnx import ModelProto from onnxconverter_common.data_types import FloatTensorType from onnxmltools import convert_lightgbm, convert_xgboost from skl2onnx.convert import convert_sklearn @@ -23,7 +24,6 @@ from metalearners import TLearner from metalearners._typing import Params -from metalearners.metalearner import VARIANT_OUTCOME_MODEL from .conftest import all_sklearn_classifiers, all_sklearn_regressors @@ -100,23 +100,27 @@ def test_tlearner_onnx( ) ml.fit(X, y, w) - onnx_models = [] - for tv in range(n_variants): - model = ml._nuisance_models[VARIANT_OUTCOME_MODEL][tv]._overall_estimator - onnx_model = onnx_converter( - model, - initial_types=[ - ( - "X", - FloatTensorType( - [None, n_numerical_features + n_categorical_features] - ), + necessary_models = ml._necessary_onnx_models() + onnx_models: dict[str, list[ModelProto]] = {} + + for model_kind, models in necessary_models.items(): + onnx_models[model_kind] = [] + for model in models: + onnx_models[model_kind].append( + onnx_converter( + model, + initial_types=[ + ( + "X", + FloatTensorType( + [None, n_numerical_features + n_categorical_features] + ), + ) + ], ) - ], - ) - onnx_models.append(onnx_model) + ) - final = ml._build_onnx({VARIANT_OUTCOME_MODEL: onnx_models}) + final = ml._build_onnx(onnx_models) sess = rt.InferenceSession( final.SerializeToString(), providers=rt.get_available_providers() ) diff --git a/tests/test_xlearner.py b/tests/test_xlearner.py index 107310e..64e2239 100644 --- a/tests/test_xlearner.py +++ b/tests/test_xlearner.py @@ -6,10 +6,10 @@ from itertools import repeat import numpy as np -import onnx import onnxruntime as rt import pytest from lightgbm import LGBMClassifier, LGBMRegressor +from onnx import ModelProto from onnxconverter_common.data_types import FloatTensorType from onnxmltools import convert_lightgbm, convert_xgboost from skl2onnx import convert_sklearn @@ -18,7 +18,6 @@ from metalearners import XLearner from metalearners._typing import Params -from metalearners.metalearner import PROPENSITY_MODEL from metalearners.xlearner import CONTROL_EFFECT_MODEL, TREATMENT_EFFECT_MODEL from .conftest import all_sklearn_classifiers, all_sklearn_regressors @@ -102,30 +101,27 @@ def test_xlearner_onnx( ) ml.fit(X, y, w) - onnx_models: dict[str, list[onnx.ModelProto]] = { - CONTROL_EFFECT_MODEL: [], - TREATMENT_EFFECT_MODEL: [], - PROPENSITY_MODEL: [], - } - for tv in range(n_variants - 1): - model = ml._treatment_models[CONTROL_EFFECT_MODEL][tv]._overall_estimator - onnx_model = treatment_onnx_converter( - model, initial_types=[("X", FloatTensorType([None, n_numerical_features]))] - ) - onnx_models[CONTROL_EFFECT_MODEL].append(onnx_model) - - model = ml._treatment_models[TREATMENT_EFFECT_MODEL][tv]._overall_estimator - onnx_model = treatment_onnx_converter( - model, initial_types=[("X", FloatTensorType([None, n_numerical_features]))] - ) - onnx_models[TREATMENT_EFFECT_MODEL].append(onnx_model) + necessary_models = ml._necessary_onnx_models() + onnx_models: dict[str, list[ModelProto]] = {} - model = ml._nuisance_models[PROPENSITY_MODEL][0]._overall_estimator - onnx_model = propensity_onnx_converter( - model, - initial_types=[("X", FloatTensorType([None, n_numerical_features]))], - ) - onnx_models[PROPENSITY_MODEL].append(onnx_model) + for model_kind, models in necessary_models.items(): + onnx_models[model_kind] = [] + if model_kind in [CONTROL_EFFECT_MODEL, TREATMENT_EFFECT_MODEL]: + onnx_converter = treatment_onnx_converter + else: + onnx_converter = propensity_onnx_converter + for model in models: + onnx_models[model_kind].append( + onnx_converter( + model, + initial_types=[ + ( + "X", + FloatTensorType([None, n_numerical_features]), + ) + ], + ) + ) final = ml._build_onnx(onnx_models)