Skip to content

Commit

Permalink
Refactor code
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescMartiEscofetQC committed Jul 25, 2024
1 parent 91cf94b commit 8022aa1
Show file tree
Hide file tree
Showing 11 changed files with 157 additions and 125 deletions.
44 changes: 15 additions & 29 deletions docs/examples/example_onnx.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@
"### Converting the base models to ONNX\n",
"\n",
"Before being able to convert the MetaLearner to ONXX we need to manually convert the necessary\n",
"base models for the prediction. To get a list of the necessary base models that need to be\n",
"base models for the prediction. To get the necessary base models that need to be\n",
"converted we can use :meth:`~metalearners.MetaLearner._necessary_onnx_models`."
]
},
Expand All @@ -156,7 +156,8 @@
"metadata": {},
"outputs": [],
"source": [
"xlearner._necessary_onnx_models()"
"necessary_models = xlearner._necessary_onnx_models()\n",
"necessary_models"
]
},
{
Expand Down Expand Up @@ -185,33 +186,18 @@
"from onnxmltools import convert_lightgbm\n",
"from onnxconverter_common.data_types import FloatTensorType\n",
"\n",
"onnx_models: dict[str, list[onnx.ModelProto]] = {\n",
" \"control_effect_model\": [],\n",
" \"treatment_effect_model\": [],\n",
" \"propensity_model\": [],\n",
"}\n",
"\n",
"for model in xlearner._nuisance_models[\"propensity_model\"]:\n",
" onnx_model = convert_lightgbm(\n",
" model._overall_estimator,\n",
" initial_types=[(\"X\", FloatTensorType([None, len(feature_columns)]))],\n",
" zipmap=False,\n",
" )\n",
" onnx_models[\"propensity_model\"].append(onnx_model)\n",
"\n",
"for model in xlearner._treatment_models[\"control_effect_model\"]:\n",
" onnx_model = convert_lightgbm(\n",
" model._overall_estimator,\n",
" initial_types=[(\"X\", FloatTensorType([None, len(feature_columns)]))],\n",
" )\n",
" onnx_models[\"control_effect_model\"].append(onnx_model)\n",
"\n",
"for model in xlearner._treatment_models[\"treatment_effect_model\"]:\n",
" onnx_model = convert_lightgbm(\n",
" model._overall_estimator,\n",
" initial_types=[(\"X\", FloatTensorType([None, len(feature_columns)]))],\n",
" )\n",
" onnx_models[\"treatment_effect_model\"].append(onnx_model)"
"onnx_models: dict[str, list[onnx.ModelProto]] = {}\n",
"\n",
"for model_kind, models in necessary_models.items():\n",
" onnx_models[model_kind] = []\n",
" for model in models:\n",
" onnx_models[model_kind].append(\n",
" convert_lightgbm(\n",
" model,\n",
" initial_types=[(\"X\", FloatTensorType([None, len(feature_columns)]))],\n",
" zipmap=False,\n",
" )\n",
" )"
]
},
{
Expand Down
12 changes: 8 additions & 4 deletions metalearners/drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
_fit_cross_fit_estimator_joblib,
_ModelSpecifications,
_ParallelJoblibSpecification,
get_overall_estimators,
)

_EPSILON = 1e-09
Expand Down Expand Up @@ -405,9 +406,12 @@ def _pseudo_outcome(

return pseudo_outcome

@classmethod
def _necessary_onnx_models(cls) -> set[str]:
return {TREATMENT_MODEL}
def _necessary_onnx_models(self) -> dict[str, list[_ScikitModel]]:
return {
TREATMENT_MODEL: get_overall_estimators(
self._treatment_models[TREATMENT_MODEL]
)
}

@copydoc(MetaLearner._build_onnx, sep="")
def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"):
Expand All @@ -422,7 +426,7 @@ def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"):
from spox import Var, build, inline

self._validate_feature_set_none()
self._validate_onnx_models(models, self._necessary_onnx_models())
self._validate_onnx_models(models, set(self._necessary_onnx_models().keys()))

input_dict = infer_input_dict(models[TREATMENT_MODEL][0])

Expand Down
22 changes: 19 additions & 3 deletions metalearners/metalearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,17 @@ def _validate_n_folds_synchronize(n_folds: dict[str, int]) -> None:
raise ValueError("Need at least two folds to use synchronization.")


def get_overall_estimators(cfes: list[CrossFitEstimator]) -> list[_ScikitModel]:
overall_estimators = []
for cfe in cfes:
if cfe._overall_estimator is None:
raise ValueError(

Check warning on line 145 in metalearners/metalearner.py

View check run for this annotation

Codecov / codecov/patch

metalearners/metalearner.py#L145

Added line #L145 was not covered by tests
"To use this functionality the overall models need to be fitted."
)
overall_estimators.append(cfe._overall_estimator)
return overall_estimators


def _evaluate_model_kind(
cfes: Sequence[CrossFitEstimator],
Xs: Sequence[Matrix],
Expand Down Expand Up @@ -1183,10 +1194,15 @@ def _validate_feature_set_none(self):
"as feature set (and therefore use all the features)."
)

@classmethod
@abstractmethod
def _necessary_onnx_models(cls) -> set[str]:
"""Return a set with the necessary models to convert the MetaLearner to ONNX."""
def _necessary_onnx_models(self) -> dict[str, list[_ScikitModel]]:
"""Return a dictionary with the necessary models to convert the MetaLearner to
ONNX.
The returned dictionary keys will be strings and the values will be list of the
overall base models (trained on the complete dataset) which should be converted
to onnx.
"""
...

Check warning on line 1206 in metalearners/metalearner.py

View check run for this annotation

Codecov / codecov/patch

metalearners/metalearner.py#L1206

Added line #L1206 was not covered by tests

@abstractmethod
Expand Down
14 changes: 9 additions & 5 deletions metalearners/rlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sklearn.metrics import root_mean_squared_error
from typing_extensions import Self

from metalearners._typing import Matrix, OosMethod, Scoring, Vector
from metalearners._typing import Matrix, OosMethod, Scoring, Vector, _ScikitModel
from metalearners._utils import (
check_spox_installed,
clip_element_absolute_value_to_epsilon,
Expand All @@ -35,6 +35,7 @@
_fit_cross_fit_estimator_joblib,
_ModelSpecifications,
_ParallelJoblibSpecification,
get_overall_estimators,
)

OUTCOME_MODEL = "outcome_model"
Expand Down Expand Up @@ -525,9 +526,12 @@ def _pseudo_outcome_and_weights(

return pseudo_outcomes, weights

@classmethod
def _necessary_onnx_models(cls) -> set[str]:
return {TREATMENT_MODEL}
def _necessary_onnx_models(self) -> dict[str, list[_ScikitModel]]:
return {
TREATMENT_MODEL: get_overall_estimators(
self._treatment_models[TREATMENT_MODEL]
)
}

@copydoc(MetaLearner._build_onnx, sep="")
def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"):
Expand All @@ -542,7 +546,7 @@ def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"):
from spox import Var, build, inline

self._validate_feature_set_none()
self._validate_onnx_models(models, self._necessary_onnx_models())
self._validate_onnx_models(models, set(self._necessary_onnx_models().keys()))

input_dict = infer_input_dict(models[TREATMENT_MODEL][0])

Expand Down
2 changes: 1 addition & 1 deletion metalearners/slearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def predict_conditional_average_outcomes(
)

@classmethod
def _necessary_onnx_models(cls) -> set[str]:
def _necessary_onnx_models(cls) -> dict[str, list[_ScikitModel]]:
raise ValueError(

Check warning on line 304 in metalearners/slearner.py

View check run for this annotation

Codecov / codecov/patch

metalearners/slearner.py#L304

Added line #L304 was not covered by tests
"The SLearner does not implement this method. Please refer to comment in the tutorial."
)
Expand Down
14 changes: 9 additions & 5 deletions metalearners/tlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from joblib import Parallel, delayed
from typing_extensions import Self

from metalearners._typing import Matrix, OosMethod, Scoring, Vector
from metalearners._typing import Matrix, OosMethod, Scoring, Vector, _ScikitModel
from metalearners._utils import (
check_spox_installed,
copydoc,
Expand All @@ -26,6 +26,7 @@
_fit_cross_fit_estimator_joblib,
_ModelSpecifications,
_ParallelJoblibSpecification,
get_overall_estimators,
)


Expand Down Expand Up @@ -150,9 +151,12 @@ def evaluate(
feature_set=self.feature_set[VARIANT_OUTCOME_MODEL],
)

@classmethod
def _necessary_onnx_models(cls) -> set[str]:
return {VARIANT_OUTCOME_MODEL}
def _necessary_onnx_models(self) -> dict[str, list[_ScikitModel]]:
return {
VARIANT_OUTCOME_MODEL: get_overall_estimators(
self._nuisance_models[VARIANT_OUTCOME_MODEL]
)
}

@copydoc(MetaLearner._build_onnx, sep="")
def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"):
Expand All @@ -167,7 +171,7 @@ def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"):
from spox import build, inline

self._validate_feature_set_none()
self._validate_onnx_models(models, self._necessary_onnx_models())
self._validate_onnx_models(models, set(self._necessary_onnx_models().keys()))

input_dict = infer_input_dict(models[VARIANT_OUTCOME_MODEL][0])

Expand Down
20 changes: 15 additions & 5 deletions metalearners/xlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from joblib import Parallel, delayed
from typing_extensions import Self

from metalearners._typing import Matrix, OosMethod, Scoring, Vector
from metalearners._typing import Matrix, OosMethod, Scoring, Vector, _ScikitModel
from metalearners._utils import (
check_spox_installed,
copydoc,
Expand All @@ -33,6 +33,7 @@
_fit_cross_fit_estimator_joblib,
_ModelSpecifications,
_ParallelJoblibSpecification,
get_overall_estimators,
)

CONTROL_EFFECT_MODEL = "control_effect_model"
Expand Down Expand Up @@ -438,9 +439,18 @@ def _pseudo_outcome(

return imputed_te_control, imputed_te_treatment

@classmethod
def _necessary_onnx_models(cls) -> set[str]:
return {PROPENSITY_MODEL, CONTROL_EFFECT_MODEL, TREATMENT_EFFECT_MODEL}
def _necessary_onnx_models(self) -> dict[str, list[_ScikitModel]]:
return {
PROPENSITY_MODEL: get_overall_estimators(
self._nuisance_models[PROPENSITY_MODEL]
),
CONTROL_EFFECT_MODEL: get_overall_estimators(
self._treatment_models[CONTROL_EFFECT_MODEL]
),
TREATMENT_EFFECT_MODEL: get_overall_estimators(
self._treatment_models[TREATMENT_EFFECT_MODEL]
),
}

@copydoc(MetaLearner._build_onnx, sep="")
def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"):
Expand All @@ -457,7 +467,7 @@ def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"):
from spox import Var, build, inline

self._validate_feature_set_none()
self._validate_onnx_models(models, self._necessary_onnx_models())
self._validate_onnx_models(models, set(self._necessary_onnx_models().keys()))

input_dict = infer_input_dict(models[PROPENSITY_MODEL][0])

Expand Down
36 changes: 20 additions & 16 deletions tests/test_drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import onnxruntime as rt
import pytest
from lightgbm import LGBMClassifier, LGBMRegressor
from onnx import ModelProto
from onnxconverter_common.data_types import FloatTensorType
from onnxmltools import convert_lightgbm, convert_xgboost
from skl2onnx import convert_sklearn
Expand All @@ -16,7 +17,6 @@

from metalearners import DRLearner
from metalearners._typing import Params
from metalearners.metalearner import TREATMENT_MODEL

from .conftest import all_sklearn_regressors

Expand Down Expand Up @@ -94,23 +94,27 @@ def test_drlearner_onnx(
)
ml.fit(X, y, w)

onnx_models = []
for tv in range(n_variants - 1):
model = ml._treatment_models[TREATMENT_MODEL][tv]._overall_estimator
onnx_model = onnx_converter(
model,
initial_types=[
(
"X",
FloatTensorType(
[None, n_numerical_features + n_categorical_features]
),
necessary_models = ml._necessary_onnx_models()
onnx_models: dict[str, list[ModelProto]] = {}

for model_kind, models in necessary_models.items():
onnx_models[model_kind] = []
for model in models:
onnx_models[model_kind].append(
onnx_converter(
model,
initial_types=[
(
"X",
FloatTensorType(
[None, n_numerical_features + n_categorical_features]
),
)
],
)
],
)
onnx_models.append(onnx_model)
)

final = ml._build_onnx({TREATMENT_MODEL: onnx_models})
final = ml._build_onnx(onnx_models)
sess = rt.InferenceSession(
final.SerializeToString(), providers=rt.get_available_providers()
)
Expand Down
36 changes: 20 additions & 16 deletions tests/test_rlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
import pandas as pd
import pytest
from lightgbm import LGBMClassifier, LGBMRegressor
from onnx import ModelProto
from onnxconverter_common.data_types import FloatTensorType
from onnxmltools import convert_lightgbm, convert_xgboost
from skl2onnx import convert_sklearn
from sklearn.linear_model import LinearRegression, LogisticRegression
from xgboost import XGBRegressor

from metalearners._utils import function_has_argument
from metalearners.metalearner import TREATMENT_MODEL
from metalearners.rlearner import RLearner, r_loss

from .conftest import all_sklearn_regressors
Expand Down Expand Up @@ -99,23 +99,27 @@ def test_rlearner_onnx(
)
ml.fit(X, y, w)

onnx_models = []
for tv in range(n_variants - 1):
model = ml._treatment_models[TREATMENT_MODEL][tv]._overall_estimator
onnx_model = onnx_converter(
model,
initial_types=[
(
"X",
FloatTensorType(
[None, n_numerical_features + n_categorical_features]
),
necessary_models = ml._necessary_onnx_models()
onnx_models: dict[str, list[ModelProto]] = {}

for model_kind, models in necessary_models.items():
onnx_models[model_kind] = []
for model in models:
onnx_models[model_kind].append(
onnx_converter(
model,
initial_types=[
(
"X",
FloatTensorType(
[None, n_numerical_features + n_categorical_features]
),
)
],
)
],
)
onnx_models.append(onnx_model)
)

final = ml._build_onnx({TREATMENT_MODEL: onnx_models})
final = ml._build_onnx(onnx_models)
sess = rt.InferenceSession(
final.SerializeToString(), providers=rt.get_available_providers()
)
Expand Down
Loading

0 comments on commit 8022aa1

Please sign in to comment.