From e8b64e6bcfc1ba811dc45485eac5e466b4bb856f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Fri, 14 Jun 2024 12:53:47 +0200 Subject: [PATCH 01/59] Speedup tests Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- tests/conftest.py | 80 +++++++++++++++++++++++++-------------- tests/test_learner.py | 11 +++--- tests/test_metalearner.py | 11 +++--- 3 files changed, 63 insertions(+), 39 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 449ab24..862131b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -75,8 +75,9 @@ def mindset_data(): return load_mindset_data() -@pytest.fixture(scope="function") -def twins_data(rng): +@pytest.fixture(scope="session") +def twins_data(): + rng = np.random.default_rng(_SEED) ( chosen_df, outcome_column, @@ -94,28 +95,30 @@ def twins_data(rng): ) -@pytest.fixture(scope="module") +@pytest.fixture(scope="session") def n_numericals(): return 25 -@pytest.fixture(scope="module") +@pytest.fixture(scope="session") def n_categoricals(): return 5 -@pytest.fixture(scope="module") +@pytest.fixture(scope="session") def sample_size(): return 100_000 -@pytest.fixture(scope="function") -def numerical_covariates(sample_size, n_numericals, rng): +@pytest.fixture(scope="session") +def numerical_covariates(sample_size, n_numericals): + rng = np.random.default_rng(_SEED) return generate_covariates(sample_size, n_numericals, format="numpy", rng=rng) -@pytest.fixture(scope="function") -def mixed_covariates(sample_size, n_numericals, n_categoricals, rng): +@pytest.fixture(scope="session") +def mixed_covariates(sample_size, n_numericals, n_categoricals): + rng = np.random.default_rng(_SEED) return generate_covariates( sample_size, n_numericals + n_categoricals, @@ -125,52 +128,72 @@ def mixed_covariates(sample_size, n_numericals, n_categoricals, rng): ) -@pytest.fixture(scope="function") +@pytest.fixture(scope="session") def numerical_experiment_dataset_continuous_outcome_binary_treatment_linear_te( - numerical_covariates, rng + sample_size, n_numericals ): - covariates, _, _ = numerical_covariates + rng = np.random.default_rng(_SEED) + covariates, _, _ = generate_covariates( + sample_size, n_numericals, format="numpy", rng=rng + ) return _generate_rct_experiment_data(covariates, False, rng, 0.3, None) -@pytest.fixture(scope="function") +@pytest.fixture(scope="session") def numerical_experiment_dataset_binary_outcome_binary_treatment_linear_te( - numerical_covariates, rng + sample_size, n_numericals ): - covariates, _, _ = numerical_covariates + rng = np.random.default_rng(_SEED) + covariates, _, _ = generate_covariates( + sample_size, n_numericals, format="numpy", rng=rng + ) return _generate_rct_experiment_data(covariates, True, rng, 0.3, None) -@pytest.fixture(scope="function") +@pytest.fixture(scope="session") def mixed_experiment_dataset_continuous_outcome_binary_treatment_linear_te( - mixed_covariates, rng + sample_size, n_numericals, n_categoricals ): - covariates, _, _ = mixed_covariates + rng = np.random.default_rng(_SEED) + covariates, _, _ = generate_covariates( + sample_size, + n_numericals + n_categoricals, + n_categoricals=n_categoricals, + format="pandas", + rng=rng, + ) return _generate_rct_experiment_data(covariates, False, rng, 0.3, None) -@pytest.fixture(scope="function") +@pytest.fixture(scope="session") def numerical_experiment_dataset_continuous_outcome_multi_treatment_linear_te( - numerical_covariates, rng + sample_size, n_numericals ): - covariates, _, _ = numerical_covariates + rng = np.random.default_rng(_SEED) + covariates, _, _ = generate_covariates( + sample_size, n_numericals, format="numpy", rng=rng + ) return _generate_rct_experiment_data( covariates, False, rng, [0.2, 0.1, 0.3, 0.15, 0.25], None ) -@pytest.fixture(scope="function") +@pytest.fixture(scope="session") def numerical_experiment_dataset_continuous_outcome_multi_treatment_constant_te( - numerical_covariates, rng + sample_size, n_numericals ): - covariates, _, _ = numerical_covariates + rng = np.random.default_rng(_SEED) + covariates, _, _ = generate_covariates( + sample_size, n_numericals, format="numpy", rng=rng + ) return _generate_rct_experiment_data( covariates, False, rng, [0.2, 0.1, 0.3, 0.15, 0.25], np.array([-2, 5, 0, 3]) ) -@pytest.fixture -def dummy_dataset(rng): +@pytest.fixture(scope="session") +def dummy_dataset(): + rng = np.random.default_rng(_SEED) sample_size = 100 n_features = 10 X = rng.standard_normal((sample_size, n_features)) @@ -179,8 +202,9 @@ def dummy_dataset(rng): return X, y, w -@pytest.fixture(scope="function") -def feature_importance_dataset(rng): +@pytest.fixture(scope="session") +def feature_importance_dataset(): + rng = np.random.default_rng(_SEED) n_samples = 10000 x0 = rng.normal(10, 1, n_samples) x1 = rng.normal(2, 1, n_samples) diff --git a/tests/test_learner.py b/tests/test_learner.py index e76018e..e40a167 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -312,9 +312,8 @@ def test_learner_twins(metalearner, reference_value, twins_data, rng): @pytest.mark.parametrize("n_classes", [2, 5, 10]) @pytest.mark.parametrize("n_variants", [2, 5]) @pytest.mark.parametrize("is_classification", [True, False]) -def test_learner_evaluate( - metalearner, is_classification, rng, sample_size, n_classes, n_variants -): +def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_variants): + sample_size = 1000 factory = metalearner_factory(metalearner) if n_variants > 2 and not factory._supports_multi_treatment(): pytest.skip() @@ -617,8 +616,9 @@ def test_conditional_average_outcomes_smoke( @pytest.mark.parametrize("n_classes", [5, 10]) @pytest.mark.parametrize("n_variants", [2, 5]) def test_conditional_average_outcomes_smoke_multi_class( - metalearner_prefix, rng, sample_size, n_classes, n_variants + metalearner_prefix, rng, n_classes, n_variants ): + sample_size = 1000 factory = metalearner_factory(metalearner_prefix) X = rng.standard_normal((sample_size, 10)) @@ -648,8 +648,9 @@ def test_conditional_average_outcomes_smoke_multi_class( @pytest.mark.parametrize("n_variants", [2, 5]) @pytest.mark.parametrize("is_classification", [True, False]) def test_predict_smoke( - metalearner_prefix, is_classification, rng, sample_size, n_classes, n_variants + metalearner_prefix, is_classification, rng, n_classes, n_variants ): + sample_size = 1000 factory = metalearner_factory(metalearner_prefix) if n_variants > 2 and not factory._supports_multi_treatment(): pytest.skip() diff --git a/tests/test_metalearner.py b/tests/test_metalearner.py index 9de1af3..2e89913 100644 --- a/tests/test_metalearner.py +++ b/tests/test_metalearner.py @@ -152,7 +152,7 @@ def test_metalearner_init( @pytest.mark.parametrize( "implementation", - [_TestMetaLearner, TLearner, SLearner, XLearner, RLearner, DRLearner], + [TLearner, SLearner, XLearner, RLearner, DRLearner], ) def test_metalearner_categorical( mixed_experiment_dataset_continuous_outcome_binary_treatment_linear_te, @@ -198,7 +198,7 @@ def test_metalearner_categorical( @pytest.mark.parametrize( "implementation", - [_TestMetaLearner, TLearner, SLearner, XLearner, RLearner, DRLearner], + [TLearner, SLearner, XLearner, RLearner, DRLearner], ) def test_metalearner_missing_data_smoke( mixed_experiment_dataset_continuous_outcome_binary_treatment_linear_te, @@ -227,7 +227,7 @@ def test_metalearner_missing_data_smoke( @pytest.mark.parametrize( "implementation", - [_TestMetaLearner, TLearner, SLearner, XLearner, RLearner, DRLearner], + [TLearner, SLearner, XLearner, RLearner, DRLearner], ) def test_metalearner_missing_data_error( numerical_experiment_dataset_continuous_outcome_binary_treatment_linear_te, @@ -258,7 +258,7 @@ def test_metalearner_missing_data_error( @pytest.mark.parametrize( "implementation", - [_TestMetaLearner, TLearner, SLearner, XLearner, RLearner, DRLearner], + [TLearner, SLearner, XLearner, RLearner, DRLearner], ) def test_metalearner_format_consistent( numerical_experiment_dataset_continuous_outcome_binary_treatment_linear_te, @@ -345,7 +345,7 @@ def test_n_folds(n_folds): @pytest.mark.parametrize( "implementation", - [_TestMetaLearner, TLearner, SLearner, XLearner, RLearner, DRLearner], + [TLearner, SLearner, XLearner, RLearner, DRLearner], ) def test_metalearner_model_names(implementation): set1 = set(implementation.nuisance_model_specifications().keys()) @@ -702,7 +702,6 @@ def test_fit_params_rlearner_error(dummy_dataset): @pytest.mark.parametrize( "implementation, needs_estimates", [ - (_TestMetaLearner, True), (TLearner, True), (SLearner, True), (XLearner, True), From 7a11445b3b1f0c607221ac5b56aacac74e0a35da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Fri, 14 Jun 2024 12:58:48 +0200 Subject: [PATCH 02/59] Switch `strict` meaning in `validate_number_positive` --- metalearners/_utils.py | 19 +++++++++++++------ metalearners/cross_fit_estimator.py | 2 +- tests/test_cross_fit_estimator.py | 4 +++- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/metalearners/_utils.py b/metalearners/_utils.py index 00eecfb..0aca691 100644 --- a/metalearners/_utils.py +++ b/metalearners/_utils.py @@ -1,7 +1,6 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause -import operator from collections.abc import Callable from inspect import signature from operator import le, lt @@ -66,14 +65,22 @@ def validate_all_vectors_same_index(*args: Vector) -> None: def validate_number_positive( - value: int | float, name: str, strict: bool = False + value: int | float, name: str, strict: bool = True ) -> None: + """Validates that a number is positive. + + If ``strict = True`` then it validates that the number is strictly positive. + """ if strict: - comparison = operator.lt + if value <= 0: + raise ValueError( + f"{name} was expected to be strictly positive but was {value}." + ) else: - comparison = operator.le - if comparison(value, 0): - raise ValueError(f"{name} was expected to be positive but was {value}.") + if value < 0: + raise ValueError( + f"{name} was expected to be positive or zero but was {value}." + ) def check_propensity_score( diff --git a/metalearners/cross_fit_estimator.py b/metalearners/cross_fit_estimator.py index e26d898..9765aa7 100644 --- a/metalearners/cross_fit_estimator.py +++ b/metalearners/cross_fit_estimator.py @@ -56,7 +56,7 @@ def _validate_data_match_prior_split( ) -> None: """Validate whether the previous test_indices and the passed data are based on the same number of observations.""" - validate_number_positive(n_observations, "n_observations", strict=False) + validate_number_positive(n_observations, "n_observations", strict=True) if test_indices is None: return expected_n_observations = sum(len(x) for x in test_indices) diff --git a/tests/test_cross_fit_estimator.py b/tests/test_cross_fit_estimator.py index 8e34b00..bb102c5 100644 --- a/tests/test_cross_fit_estimator.py +++ b/tests/test_cross_fit_estimator.py @@ -223,7 +223,9 @@ def test_crossfitestimator_n_folds_1(rng, sample_size): ) def test_validate_data_match(n_observations, test_indices, success): if n_observations < 1: - with pytest.raises(ValueError, match="was expected to be positive"): + with pytest.raises( + ValueError, match=r"was expected to be (strictly )?positive" + ): _validate_data_match_prior_split(n_observations, test_indices) return if success: From 642cb2e21b2ddb13dd6f7fa7acc63df0db43e2a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 14 Jun 2024 13:26:40 +0200 Subject: [PATCH 03/59] Add classes_ to cfe --- metalearners/cross_fit_estimator.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/metalearners/cross_fit_estimator.py b/metalearners/cross_fit_estimator.py index e26d898..97ef105 100644 --- a/metalearners/cross_fit_estimator.py +++ b/metalearners/cross_fit_estimator.py @@ -101,6 +101,7 @@ class CrossFitEstimator: _overall_estimator: _ScikitModel | None = field(init=False) _test_indices: tuple[np.ndarray] | None = field(init=False) _n_classes: int | None = field(init=False) + classes_: np.ndarray | None = field(init=False) def __post_init__(self): _validate_n_folds(self.n_folds) @@ -115,6 +116,7 @@ def __post_init__(self): self._overall_estimator: _ScikitModel | None = None self._test_indices: tuple[np.ndarray] | None = None self._n_classes: int | None = None + self.classes_: np.ndarray | None = None def _train_overall_estimator( self, X: Matrix, y: Matrix | Vector, fit_params: dict | None = None @@ -189,7 +191,14 @@ def fit( if is_classifier(self): self._n_classes = len(np.unique(y)) - + self.classes_ = np.unique(y) + for e in self._estimators: + if set(e.classes_) != set(self.classes_): # type: ignore + raise ValueError( + "Some cross fit estimators training data had less classes than " + "the overall estimator. Please check the cv parameter. If you are " + "synchronizing the folds in a MetaLearner consider not doing it." + ) return self def _initialize_prediction_tensor( From d7cef73591065382e91673b106c25fe02a6b0f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 13 Jun 2024 08:41:21 +0200 Subject: [PATCH 04/59] Fix RLoss calculation in evaluate --- metalearners/rlearner.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index bf39caa..c139dd9 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -335,6 +335,10 @@ def evaluate( treatment_evaluation = {} tau_hat = self.predict(X=X, is_oos=is_oos, oos_method=oos_method) for treatment_variant in range(1, self.n_variants): + is_treatment = w == treatment_variant + is_control = w == 0 + mask = is_treatment | is_control + propensity_estimates = w_hat[:, treatment_variant] / ( w_hat[:, 0] + w_hat[:, treatment_variant] ) @@ -344,11 +348,11 @@ def evaluate( else tau_hat[:, treatment_variant - 1, 0] ) treatment_evaluation[f"r_loss_{treatment_variant}_vs_0"] = r_loss( - cate_estimates=cate_estimates, - outcome_estimates=y_hat, - propensity_scores=propensity_estimates, - outcomes=y, - treatments=w, + cate_estimates=cate_estimates[mask], + outcome_estimates=y_hat[mask], + propensity_scores=propensity_estimates[mask], + outcomes=y[mask], + treatments=w[mask] == treatment_variant, ) return propensity_evaluation | outcome_evaluation | treatment_evaluation From 963debfc22597fa32a6a8245a43c7a90f133675b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 14 Jun 2024 14:43:05 +0200 Subject: [PATCH 05/59] Parametrize evaluate --- metalearners/_utils.py | 17 +++ metalearners/cross_fit_estimator.py | 9 +- metalearners/drlearner.py | 79 +++++++++++-- metalearners/metalearner.py | 84 +++++++++++++- metalearners/rlearner.py | 64 ++++++++--- metalearners/slearner.py | 35 ++++-- metalearners/tlearner.py | 43 ++++--- metalearners/xlearner.py | 78 ++++++++++++- tests/test_learner.py | 167 +++++++++++++++++++++++++--- 9 files changed, 503 insertions(+), 73 deletions(-) diff --git a/metalearners/_utils.py b/metalearners/_utils.py index 0aca691..1d82b7b 100644 --- a/metalearners/_utils.py +++ b/metalearners/_utils.py @@ -463,3 +463,20 @@ def simplify_output_2d(tensor: np.ndarray) -> np.ndarray: "This function requires a regression or a classification with binary outcome " "task." ) + + +# Taken from https://stackoverflow.com/questions/13741998/is-there-a-way-to-let-classes-inherit-the-documentation-of-their-superclass-with +def copydoc(fromfunc, sep="\n"): + """ + Decorator: Copy the docstring of `fromfunc` + """ + + def _decorator(func): + sourcedoc = fromfunc.__doc__ + if func.__doc__ is None: + func.__doc__ = sourcedoc + else: + func.__doc__ = sep.join([sourcedoc, func.__doc__]) + return func + + return _decorator diff --git a/metalearners/cross_fit_estimator.py b/metalearners/cross_fit_estimator.py index 889f20c..0cccb8a 100644 --- a/metalearners/cross_fit_estimator.py +++ b/metalearners/cross_fit_estimator.py @@ -362,12 +362,17 @@ def __init__( self.original_predict_proba = model.predict_proba def __enter__(self): - self.model.predict = partial( # type: ignore + new_predict = partial( self.model.predict, is_oos=self.is_oos, oos_method=self.oos_method ) - self.model.predict_proba = partial( # type: ignore + new_predict.__name__ = "predict" # type: ignore + self.model.predict = new_predict # type: ignore + + new_predict_proba = partial( self.model.predict_proba, is_oos=self.is_oos, oos_method=self.oos_method ) + new_predict_proba.__name__ = "predict_proba" # type: ignore + self.model.predict_proba = new_predict_proba # type: ignore return self.model def __exit__(self, *args): diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index ea9b2f1..93b86a5 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -1,6 +1,8 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause +from collections.abc import Callable, Mapping + import numpy as np from joblib import Parallel, delayed from typing_extensions import Self @@ -23,6 +25,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, + _evaluate_model, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -148,6 +151,7 @@ def fit( w=w, y=y, treatment_variant=treatment_variant, + is_oos=False, ) treatment_jobs.append( @@ -205,37 +209,90 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: - raise NotImplementedError( - "This feature is not yet implemented for the DR-Learner." + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + if scoring is None: + scoring = {} + self._validate_scoring(scoring=scoring) + + default_metric = ( + "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" + ) + masks = [] + for tv in range(self.n_variants): + masks.append(w == tv) + variant_outcome_evaluation = _evaluate_model( + cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], + X=[X[w == tv] for tv in range(self.n_variants)], + y=[y[w == tv] for tv in range(self.n_variants)], + scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + model_kind=VARIANT_OUTCOME_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, + ) + + propensity_evaluation = _evaluate_model( + cfes=self._nuisance_models[PROPENSITY_MODEL], + X=[X], + y=[w], + scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + model_kind=PROPENSITY_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, ) + pseudo_outcome: list[np.ndarray] = [] + for treatment_variant in range(1, self.n_variants): + tv_pseudo_outcome = self._pseudo_outcome( + X=X, + y=y, + w=w, + treatment_variant=treatment_variant, + is_oos=is_oos, + oos_method=oos_method, + ) + pseudo_outcome.append(tv_pseudo_outcome) + + treatment_evaluation = _evaluate_model( + self._treatment_models[TREATMENT_MODEL], + X=[X for _ in range(1, self.n_variants)], + y=pseudo_outcome, + scorers=scoring.get(TREATMENT_MODEL, ["neg_root_mean_squared_error"]), + model_kind=TREATMENT_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=True, + ) + + return variant_outcome_evaluation | propensity_evaluation | treatment_evaluation + def _pseudo_outcome( self, X: Matrix, y: Vector, w: Vector, treatment_variant: int, + is_oos: bool, + oos_method: OosMethod = OVERALL, epsilon: float = _EPSILON, ) -> np.ndarray: - """Compute the DR-Learner pseudo outcome. - - Importantly, this method assumes to be applied on in-sample data. - In other words, ``is_oos`` will always be set to ``False`` when calling - ``predict_nuisance``. - """ + """Compute the DR-Learner pseudo outcome.""" validate_valid_treatment_variant_not_control(treatment_variant, self.n_variants) conditional_average_outcome_estimates = ( self.predict_conditional_average_outcomes( X=X, - is_oos=False, + is_oos=is_oos, + oos_method=oos_method, ) ) propensity_estimates = self.predict_nuisance( X=X, - is_oos=False, + is_oos=is_oos, + oos_method=oos_method, model_kind=PROPENSITY_MODEL, model_ord=0, ) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 945c791..90ccda8 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: BSD-3-Clause from abc import ABC, abstractmethod -from collections.abc import Callable, Collection +from collections.abc import Callable, Collection, Mapping, Sequence from copy import deepcopy from dataclasses import dataclass from typing import TypedDict @@ -10,6 +10,7 @@ import numpy as np import pandas as pd import shap +from sklearn.metrics import get_scorer from sklearn.model_selection import KFold from typing_extensions import Self @@ -32,6 +33,7 @@ from metalearners.cross_fit_estimator import ( OVERALL, CrossFitEstimator, + _PredictContext, ) from metalearners.explainer import Explainer @@ -133,6 +135,41 @@ def _validate_n_folds_synchronize(n_folds: dict[str, int]) -> None: raise ValueError("Need at least two folds to use synchronization.") +def _evaluate_model( + cfes: Sequence[CrossFitEstimator], + X: Sequence[Matrix], + y: Sequence[Vector], + scorers: Sequence[str | Callable], + model_kind: str, + is_oos: bool, + is_treatment: bool, + oos_method: OosMethod = OVERALL, +) -> dict[str, float]: + """Helper function to evaluate all the models of the same model kind.""" + prefix = f"{model_kind}_" + evaluation_metrics: dict[str, float] = {} + for idx, scorer in enumerate(scorers): + if isinstance(scorer, str): + scorer_str = scorer + scorer_call: Callable = get_scorer(scorer) + else: + scorer_str = f"custom_scorer_{idx}" + scorer_call = scorer + for i, cfe in enumerate(cfes): + if is_treatment: + treatment_variant = i + 1 + index_str = f"{treatment_variant}_vs_0_" + else: + if len(cfes) == 1: + index_str = "" + else: + index_str = f"{i}_" + name = f"{prefix}{index_str}{scorer_str}" + with _PredictContext(cfe, is_oos, oos_method) as modified_cfe: + evaluation_metrics[name] = scorer_call(modified_cfe, X[i], y[i]) + return evaluation_metrics + + class _ModelSpecifications(TypedDict): # The quotes on MetaLearner are necessary for type hinting as it's not yet defined # here. Check https://stackoverflow.com/questions/55320236/does-python-evaluate-type-hinting-of-a-forward-reference @@ -311,6 +348,16 @@ def _validate_models(self) -> None: factory, predict_method, name=f"treatment model {model_kind}" ) + @classmethod + def _validate_scoring(cls, scoring: Mapping[str, list[str | Callable]]): + if not set(scoring.keys()) <= ( + set(cls.nuisance_model_specifications().keys()) + | set(cls.treatment_model_specifications().keys()) + ): + raise ValueError( + "scoring dict keys need to be a subset of the model names in the MetaLearner" + ) + def _qualified_fit_params( self, fit_params: None | dict, @@ -824,8 +871,39 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: - """Evaluate all models contained in a MetaLearner.""" + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + r"""Evaluate the models models contained in the MetaLearner. + + ``scoring`` keys must be a subset of the names of the models contained in the + MetaLearner, for information about this names check :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` + and :meth:`~metalearners.metalearner.MetaLearner.treatment_model_specifications`. + The values must be a list of: + + * ``string`` representing a ``sklearn`` scoring method. Check + `here `__ + for the possible values. + * ``Callable`` with signature ``scorer(estimator, X, y_true, **kwargs)``. We recommend + using `sklearn.metrics.make_scorer `_ + to create this callables. + + If some model name is not present in the keys of ``scoring`` then the default used + metrics will be ``neg_log_loss`` if it is a classifier and ``neg_root_mean_squared_error`` + if it is a regressor. + + The returned dictionary keys have the following structure: + + * For nuisance models: + + * If the cardinality is one: ``f"{model_kind}_{scorer}"`` + * If there is one model for each treatment variant (including control): + ``f"{model_kind}_{treatment_variant}_{scorer}"`` + + * For treatment models: ``f"{model_kind}_{treatment_variant}_vs_0_{scorer}"`` + + Where ``scorer`` is the name of the scorer if it is a string and ``"custom_scorer_{idx}"`` + if it is a callable where ``idx`` is the index in the ``scorers`` list. + """ ... def explainer( diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index 6a09847..349fbea 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -1,14 +1,17 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause +from collections.abc import Callable, Mapping + import numpy as np from joblib import Parallel, delayed -from sklearn.metrics import log_loss, root_mean_squared_error +from sklearn.metrics import root_mean_squared_error from typing_extensions import Self from metalearners._typing import Matrix, OosMethod, Vector from metalearners._utils import ( clip_element_absolute_value_to_epsilon, + copydoc, function_has_argument, get_one, get_predict, @@ -24,6 +27,7 @@ TREATMENT, TREATMENT_MODEL, MetaLearner, + _evaluate_model, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -323,6 +327,7 @@ def predict( tau_hat[variant_indices, treatment_variant - 1] = variant_estimates return tau_hat + @copydoc(MetaLearner.evaluate, sep="\n\t") def evaluate( self, X: Matrix, @@ -330,7 +335,41 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + """In the RLearner case, the ``"treatment_model"`` is always evaluated with the + :func:`~metalearners.rlearner.r_loss` and the ``scoring["treatment_model"]`` + parameter is ignored.""" + if scoring is None: + scoring = {} + self._validate_scoring(scoring=scoring) + + propensity_evaluation = _evaluate_model( + cfes=self._nuisance_models[PROPENSITY_MODEL], + X=[X], + y=[w], + scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + model_kind=PROPENSITY_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, + ) + + default_metric = ( + "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" + ) + outcome_evaluation = _evaluate_model( + cfes=self._nuisance_models[OUTCOME_MODEL], + X=[X], + y=[y], + scorers=scoring.get(OUTCOME_MODEL, [default_metric]), + model_kind=OUTCOME_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, + ) + + # TODO: improve this? generalize it to other metalearners? w_hat = self.predict_nuisance( X=X, is_oos=is_oos, @@ -338,7 +377,6 @@ def evaluate( model_kind=PROPENSITY_MODEL, model_ord=0, ) - propensity_evaluation = {"propensity_cross_entropy": log_loss(w, w_hat)} y_hat = self.predict_nuisance( X=X, @@ -350,15 +388,13 @@ def evaluate( if self.is_classification: y_hat = y_hat[:, 1] - outcome_evaluation = ( - {"outcome_log_loss": log_loss(y, y_hat)} - if self.is_classification - else {"outcome_rmse": root_mean_squared_error(y, y_hat)} - ) - treatment_evaluation = {} tau_hat = self.predict(X=X, is_oos=is_oos, oos_method=oos_method) for treatment_variant in range(1, self.n_variants): + is_treatment = w == treatment_variant + is_control = w == 0 + mask = is_treatment | is_control + propensity_estimates = w_hat[:, treatment_variant] / ( w_hat[:, 0] + w_hat[:, treatment_variant] ) @@ -368,11 +404,11 @@ def evaluate( else tau_hat[:, treatment_variant - 1, 0] ) treatment_evaluation[f"r_loss_{treatment_variant}_vs_0"] = r_loss( - cate_estimates=cate_estimates, - outcome_estimates=y_hat, - propensity_scores=propensity_estimates, - outcomes=y, - treatments=w, + cate_estimates=cate_estimates[mask], + outcome_estimates=y_hat[mask], + propensity_scores=propensity_estimates[mask], + outcomes=y[mask], + treatments=w[mask] == treatment_variant, ) return propensity_evaluation | outcome_evaluation | treatment_evaluation diff --git a/metalearners/slearner.py b/metalearners/slearner.py index 553b558..718464a 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -2,10 +2,10 @@ # # SPDX-License-Identifier: BSD-3-Clause import warnings +from collections.abc import Callable, Mapping import numpy as np import pandas as pd -from sklearn.metrics import log_loss, root_mean_squared_error from typing_extensions import Self from metalearners._typing import Matrix, OosMethod, Vector @@ -15,7 +15,12 @@ supports_categoricals, ) from metalearners.cross_fit_estimator import OVERALL -from metalearners.metalearner import NUISANCE, MetaLearner, _ModelSpecifications +from metalearners.metalearner import ( + NUISANCE, + MetaLearner, + _evaluate_model, + _ModelSpecifications, +) _BASE_MODEL = "base_model" @@ -150,17 +155,29 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: - # TODO: Parameterize evaluation approaches. + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + if scoring is None: + scoring = {} + self._validate_scoring(scoring=scoring) + + default_metric = ( + "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" + ) + X_with_w = _append_treatment_to_covariates( X, w, self._supports_categoricals, self.n_variants ) - y_pred = self.predict_nuisance( - X=X_with_w, model_kind=_BASE_MODEL, model_ord=0, is_oos=is_oos + return _evaluate_model( + cfes=self._nuisance_models[_BASE_MODEL], + X=[X_with_w], + y=[y], + scorers=scoring.get(_BASE_MODEL, [default_metric]), + model_kind=_BASE_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, ) - if self.is_classification: - return {"cross_entropy": log_loss(y, y_pred)} - return {"rmse": root_mean_squared_error(y, y_pred)} def predict_conditional_average_outcomes( self, X: Matrix, is_oos: bool, oos_method: OosMethod = OVERALL diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index 9380144..9a5b8d9 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -2,9 +2,10 @@ # # SPDX-License-Identifier: BSD-3-Clause +from collections.abc import Callable, Mapping + import numpy as np from joblib import Parallel, delayed -from sklearn.metrics import log_loss, root_mean_squared_error from typing_extensions import Self from metalearners._typing import Matrix, OosMethod, Vector @@ -15,6 +16,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, + _evaluate_model, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -114,21 +116,26 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: - # TODO: Parametrize evaluation approaches. - conditional_average_outcomes = self.predict_conditional_average_outcomes( - X=X, is_oos=is_oos, oos_method=oos_method + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + if scoring is None: + scoring = {} + self._validate_scoring(scoring=scoring) + + default_metric = ( + "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" + ) + + masks = [] + for tv in range(self.n_variants): + masks.append(w == tv) + return _evaluate_model( + cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], + X=[X[w == tv] for tv in range(self.n_variants)], + y=[y[w == tv] for tv in range(self.n_variants)], + scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + model_kind=VARIANT_OUTCOME_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, ) - evaluation_metrics = {} - for treatment_variant in range(self.n_variants): - prefix = f"variant_{treatment_variant}" - variant_outcomes = conditional_average_outcomes[:, treatment_variant] - if self.is_classification: - evaluation_metrics[f"{prefix}_cross_entropy"] = log_loss( - y[w == treatment_variant], variant_outcomes[w == treatment_variant] - ) - else: - evaluation_metrics[f"{prefix}_rmse"] = root_mean_squared_error( - y[w == treatment_variant], variant_outcomes[w == treatment_variant] - ) - return evaluation_metrics diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index 729899c..d2b691d 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -1,6 +1,8 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause +from collections.abc import Callable, Mapping + import numpy as np from joblib import Parallel, delayed from typing_extensions import Self @@ -21,6 +23,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, + _evaluate_model, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -285,9 +288,78 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: - raise NotImplementedError( - "This feature is not yet implemented for the X-Learner." + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + if scoring is None: + scoring = {} + self._validate_scoring(scoring=scoring) + + default_metric = ( + "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" + ) + masks = [] + for tv in range(self.n_variants): + masks.append(w == tv) + variant_outcome_evaluation = _evaluate_model( + cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], + X=[X[w == tv] for tv in range(self.n_variants)], + y=[y[w == tv] for tv in range(self.n_variants)], + scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + model_kind=VARIANT_OUTCOME_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, + ) + + propensity_evaluation = _evaluate_model( + cfes=self._nuisance_models[PROPENSITY_MODEL], + X=[X], + y=[w], + scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + model_kind=PROPENSITY_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, + ) + + imputed_te_control: list[np.ndarray] = [] + imputed_te_treatment: list[np.ndarray] = [] + for treatment_variant in range(1, self.n_variants): + tv_imputed_te_control, tv_imputed_te_treatment = self._pseudo_outcome( + X, y, w, treatment_variant + ) + imputed_te_control.append(tv_imputed_te_control) + imputed_te_treatment.append(tv_imputed_te_treatment) + + te_treatment_evaluation = _evaluate_model( + self._treatment_models[TREATMENT_EFFECT_MODEL], + X=[X[w == tv] for tv in range(1, self.n_variants)], + y=imputed_te_treatment, + scorers=scoring.get( + TREATMENT_EFFECT_MODEL, ["neg_root_mean_squared_error"] + ), + model_kind=TREATMENT_EFFECT_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=True, + ) + + te_control_evaluation = _evaluate_model( + self._treatment_models[CONTROL_EFFECT_MODEL], + X=[X[w == 0] for _ in range(1, self.n_variants)], + y=imputed_te_control, + scorers=scoring.get(CONTROL_EFFECT_MODEL, ["neg_root_mean_squared_error"]), + model_kind=CONTROL_EFFECT_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=True, + ) + + return ( + variant_outcome_evaluation + | propensity_evaluation + | te_treatment_evaluation + | te_control_evaluation ) def _pseudo_outcome( diff --git a/tests/test_learner.py b/tests/test_learner.py index 30dce74..2afe3c6 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -6,7 +6,7 @@ import pytest from lightgbm import LGBMClassifier, LGBMRegressor from sklearn.linear_model import LinearRegression, LogisticRegression -from sklearn.metrics import root_mean_squared_error +from sklearn.metrics import make_scorer, root_mean_squared_error from sklearn.model_selection import train_test_split from metalearners.cross_fit_estimator import _OOS_WHITELIST @@ -310,11 +310,12 @@ def test_learner_twins(metalearner, reference_value, twins_data, rng): assert rmse < reference_value * (1 + _OOS_REFERENCE_VALUE_TOLERANCE) -@pytest.mark.parametrize("metalearner", ["S", "T", "R"]) +@pytest.mark.parametrize("metalearner", ["S", "T", "X", "R", "DR"]) @pytest.mark.parametrize("n_classes", [2, 5, 10]) @pytest.mark.parametrize("n_variants", [2, 5]) @pytest.mark.parametrize("is_classification", [True, False]) -def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_variants): +@pytest.mark.parametrize("is_oos", [True, False]) +def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_variants, is_oos): sample_size = 1000 factory = metalearner_factory(metalearner) if n_variants > 2 and not factory._supports_multi_treatment(): @@ -323,12 +324,17 @@ def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_vari pytest.skip() # skip repeated tests if is_classification and n_classes > 2 and not factory._supports_multi_class(): pytest.skip() + test_size = 250 X = rng.standard_normal((sample_size, 10)) + X_test = rng.standard_normal((test_size, 10)) if is_oos else X w = rng.integers(0, n_variants, size=sample_size) + w_test = rng.integers(0, n_variants, test_size) if is_oos else w if is_classification: y = rng.integers(0, n_classes, size=sample_size) + y_test = rng.integers(0, n_classes, test_size) if is_oos else y else: y = rng.standard_normal(sample_size) + y_test = rng.standard_normal(test_size) if is_oos else y base_learner = _linear_base_learner(is_classification) @@ -341,28 +347,163 @@ def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_vari n_folds=2, ) learner.fit(X=X, y=y, w=w) - evaluation = learner.evaluate(X=X, y=y, w=w, is_oos=False) + evaluation = learner.evaluate(X=X_test, y=y_test, w=w_test, is_oos=is_oos) if is_classification: if metalearner == "S": - assert "cross_entropy" in evaluation - elif metalearner == "T": + assert set(evaluation.keys()) == {"base_model_neg_log_loss"} + elif metalearner in ["T", "X", "DR"]: for v in range(n_variants): - assert f"variant_{v}_cross_entropy" in evaluation + assert f"variant_outcome_model_{v}_neg_log_loss" in evaluation elif metalearner == "R": - assert "outcome_log_loss" in evaluation + assert "outcome_model_neg_log_loss" in evaluation else: if metalearner == "S": - assert "rmse" in evaluation - elif metalearner == "T": + assert set(evaluation.keys()) == {"base_model_neg_root_mean_squared_error"} + elif metalearner in ["T", "X", "DR"]: for v in range(n_variants): - assert f"variant_{v}_rmse" in evaluation + assert ( + f"variant_outcome_model_{v}_neg_root_mean_squared_error" + in evaluation + ) elif metalearner == "R": - assert "outcome_rmse" in evaluation + assert "outcome_model_neg_root_mean_squared_error" in evaluation if metalearner == "R": assert ( {f"r_loss_{i}_vs_0" for i in range(1, n_variants)} - | {"propensity_cross_entropy"} + | {"propensity_model_neg_log_loss"} ) <= set(evaluation.keys()) + elif metalearner == "X": + assert "propensity_model_neg_log_loss" in evaluation + for v in range(1, n_variants): + assert ( + f"treatment_effect_model_{v}_vs_0_neg_root_mean_squared_error" + in evaluation + ) + assert ( + f"control_effect_model_{v}_vs_0_neg_root_mean_squared_error" + in evaluation + ) + elif metalearner == "DR": + assert "propensity_model_neg_log_loss" in evaluation + for v in range(1, n_variants): + assert f"treatment_model_{v}_vs_0_neg_root_mean_squared_error" in evaluation + + +def new_score(estimator, X, y): + # This score doesn't make sense. + return np.mean(y - estimator.predict(X)) + + +def new_score_2(y, y_pred): + # This score doesn't make sense. + return np.mean(y - y_pred) + + +@pytest.mark.parametrize( + "metalearner, is_classification, scoring, expected_keys", + [ + ("S", True, {"base_model": ["accuracy"]}, {"base_model_accuracy"}), + ("S", False, {"base_model": ["max_error"]}, {"base_model_max_error"}), + ( + "T", + False, + {"variant_outcome_model": [new_score, make_scorer(new_score_2)]}, + { + "variant_outcome_model_0_custom_scorer_0", + "variant_outcome_model_0_custom_scorer_1", + "variant_outcome_model_1_custom_scorer_0", + "variant_outcome_model_1_custom_scorer_1", + "variant_outcome_model_2_custom_scorer_0", + "variant_outcome_model_2_custom_scorer_1", + }, + ), + ( + "X", + True, + { + "variant_outcome_model": ["f1"], + "propensity_model": [], + "control_effect_model": [], + "treatment_effect_model": ["r2", new_score], + }, + { + "variant_outcome_model_0_f1", + "variant_outcome_model_1_f1", + "variant_outcome_model_2_f1", + "treatment_effect_model_1_vs_0_r2", + "treatment_effect_model_1_vs_0_custom_scorer_1", + "treatment_effect_model_2_vs_0_r2", + "treatment_effect_model_2_vs_0_custom_scorer_1", + }, + ), + ( + "R", + False, + { + "outcome_model": [make_scorer(new_score_2)], + "propensity_model": [], + }, + { + "outcome_model_custom_scorer_0", + "r_loss_1_vs_0", + "r_loss_2_vs_0", + }, + ), + ( + "DR", + True, + { + "variant_outcome_model": ["f1"], + "propensity_model": [], + "treatment_model": ["r2", new_score], + }, + { + "variant_outcome_model_0_f1", + "variant_outcome_model_1_f1", + "variant_outcome_model_2_f1", + "treatment_model_1_vs_0_r2", + "treatment_model_1_vs_0_custom_scorer_1", + "treatment_model_2_vs_0_r2", + "treatment_model_2_vs_0_custom_scorer_1", + }, + ), + ], +) +@pytest.mark.parametrize("is_oos", [True, False]) +def test_learner_evaluate_scoring( + metalearner, is_classification, scoring, expected_keys, is_oos, rng +): + factory = metalearner_factory(metalearner) + nuisance_model_factory = _linear_base_learner(is_classification) + nuisance_model_params = _linear_base_learner_params(is_classification) + + n_variants = 3 + sample_size = 1000 + test_size = 250 + X = rng.standard_normal((sample_size, 10)) + X_test = rng.standard_normal((test_size, 10)) if is_oos else X + w = rng.integers(0, n_variants, size=sample_size) + w_test = rng.integers(0, n_variants, test_size) if is_oos else w + if is_classification: + y = rng.integers(0, 2, size=sample_size) + y_test = rng.integers(0, 2, test_size) if is_oos else y + else: + y = rng.standard_normal(sample_size) + y_test = rng.standard_normal(test_size) if is_oos else y + + ml = factory( + is_classification=is_classification, + n_variants=n_variants, + nuisance_model_factory=nuisance_model_factory, + propensity_model_factory=LGBMClassifier, + treatment_model_factory=LinearRegression, + nuisance_model_params=nuisance_model_params, + propensity_model_params={"n_estimators": 1}, + n_folds=2, + ) + ml.fit(X, y, w) + evaluation = ml.evaluate(X_test, y_test, w_test, is_oos, scoring=scoring) + assert set(evaluation.keys()) == expected_keys @pytest.mark.parametrize("outcome_kind", ["binary", "continuous"]) From ad71c6645a8d0d1f9a2660af9df356ac2125f692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 14 Jun 2024 14:49:41 +0200 Subject: [PATCH 06/59] run pchs --- tests/test_learner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_learner.py b/tests/test_learner.py index 2afe3c6..404fbeb 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -315,7 +315,9 @@ def test_learner_twins(metalearner, reference_value, twins_data, rng): @pytest.mark.parametrize("n_variants", [2, 5]) @pytest.mark.parametrize("is_classification", [True, False]) @pytest.mark.parametrize("is_oos", [True, False]) -def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_variants, is_oos): +def test_learner_evaluate( + metalearner, is_classification, rng, n_classes, n_variants, is_oos +): sample_size = 1000 factory = metalearner_factory(metalearner) if n_variants > 2 and not factory._supports_multi_treatment(): From 1c391935d93fd7e5953060670ef11d453be53716 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 14 Jun 2024 14:57:38 +0200 Subject: [PATCH 07/59] Implement MetaLearnerGridSearchCV --- metalearners/_utils.py | 9 + metalearners/metalearner_grid_search_cv.py | 296 +++++++++++++++++++++ tests/test_metalearner_grid_search_cv.py | 114 ++++++++ 3 files changed, 419 insertions(+) create mode 100644 metalearners/metalearner_grid_search_cv.py create mode 100644 tests/test_metalearner_grid_search_cv.py diff --git a/metalearners/_utils.py b/metalearners/_utils.py index 1d82b7b..d0804f1 100644 --- a/metalearners/_utils.py +++ b/metalearners/_utils.py @@ -32,6 +32,15 @@ def index_matrix(matrix: Matrix, rows: Vector) -> Matrix: return matrix[rows, :] +def index_vector(vector: Vector, rows: Vector) -> Vector: + """Subselect certain rows from a vector.""" + if isinstance(rows, pd.Series): + rows = rows.to_numpy() + if isinstance(vector, pd.Series): + return vector.iloc[rows] + return vector[rows] + + def are_pd_indices_equal(*args: pd.DataFrame | pd.Series) -> bool: if len(args) < 2: return True diff --git a/metalearners/metalearner_grid_search_cv.py b/metalearners/metalearner_grid_search_cv.py new file mode 100644 index 0000000..5dc59dd --- /dev/null +++ b/metalearners/metalearner_grid_search_cv.py @@ -0,0 +1,296 @@ +# # Copyright (c) QuantCo 2024-2024 +# # SPDX-License-Identifier: BSD-3-Clause + +import time +from collections.abc import Callable, Mapping, Sequence +from dataclasses import dataclass +from functools import reduce +from operator import add + +import pandas as pd +from joblib import Parallel, delayed +from sklearn.model_selection import KFold, ParameterGrid + +from metalearners._typing import Matrix, OosMethod, Vector, _ScikitModel +from metalearners._utils import index_matrix, index_vector +from metalearners.cross_fit_estimator import OVERALL +from metalearners.metalearner import PROPENSITY_MODEL, MetaLearner + + +@dataclass(frozen=True) +class _FitAndScoreJob: + metalearner: MetaLearner + X_train: Matrix + y_train: Vector + w_train: Vector + X_test: Matrix + y_test: Vector + w_test: Vector + oos_method: OosMethod + scoring: Mapping[str, list[str | Callable]] | None + kwargs: dict + cv_index: int + + +@dataclass(frozen=True) +class _CVResult: + r"""Cross Validation Result.""" + + metalearner: MetaLearner + train_scores: dict + test_scores: dict + fit_time: float + score_time: float + cv_index: int + + +def _fit_and_score(job: _FitAndScoreJob) -> _CVResult: + start_time = time.time() + job.metalearner.fit(job.X_train, job.y_train, job.w_train, **job.kwargs) + fit_time = time.time() - start_time + + train_scores = job.metalearner.evaluate( + X=job.X_train, + y=job.y_train, + w=job.w_train, + is_oos=False, + scoring=job.scoring, + ) + test_scores = job.metalearner.evaluate( + X=job.X_test, + y=job.y_test, + w=job.w_test, + is_oos=True, + oos_method=job.oos_method, + scoring=job.scoring, + ) + score_time = time.time() - fit_time + return _CVResult( + metalearner=job.metalearner, + fit_time=fit_time, + score_time=score_time, + train_scores=train_scores, + test_scores=test_scores, + cv_index=job.cv_index, + ) + + +def _format_results(results: Sequence[_CVResult]): + rows = [] + for result in results: + row: dict[str, str | int | float] = {} + row["metalearner"] = result.metalearner.__class__.__name__ + nuisance_models = set(result.metalearner.nuisance_model_specifications().keys()) + treatment_models = set( + result.metalearner.treatment_model_specifications().keys() + ) + for model_kind in nuisance_models: + row[model_kind] = result.metalearner.nuisance_model_factory[ + model_kind + ].__name__ + for param, value in result.metalearner.nuisance_model_params[ + model_kind + ].items(): + row[f"{model_kind}_{param}"] = value + for model_kind in treatment_models: + row[model_kind] = result.metalearner.treatment_model_factory[ + model_kind + ].__name__ + for param, value in result.metalearner.treatment_model_params[ + model_kind + ].items(): + row[f"{model_kind}_{param}"] = value + row["cv_index"] = result.cv_index + row["fit_time"] = result.fit_time + row["score_time"] = result.score_time + for name, value in result.train_scores.items(): + row[f"train_{name}"] = value + for name, value in result.test_scores.items(): + row[f"test_{name}"] = value + rows.append(row) + df = pd.DataFrame(rows) + return df + + +class MetaLearnerGridSearchCV: + """Exhaustive search over specified parameter values for a MetaLearner. + + ``metalearner_params`` should contain the necessary params for the MetaLearner initialization + such as ``n_variants`` and ``is_classification``. It can also contain optional parameters + that all MetaLearners should be initialized with such as ``n_folds`` or ``feature_set``. + Importantly, ``random_state`` must be passed through the ``random_state`` parameter + and not through ``metalearner_params``. + + ``base_learner_grid`` keys should be the names of all the models contained in the MetaLearner + defined by ``metalearner_factory``, for information about this names check + :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` and + :meth:`~metalearners.metalearner.MetaLearner.treatment_model_specifications`. The + values should be sequences of model factories. + + ``param_grid`` should contain the parameters grid for each type of model used by the + base learners defined in ``base_learner_grid``. The keys should be strings with the + model class name. An example for optimizing over the :class:`metalearners.DRLearner` + would be: + + .. code-block:: python + + base_learner_grid = { + "propensity_model": (LGBMClassifier, LogisticRegression), + "variant_outcome_model": (LGBMRegressor, LinearRegression), + "treatment_model": (LGBMRegressor) + } + + param_grid = { + "LGBMRegressor": {"n_estimators": [1, 2], "verbose": [-1]}, + "LGBMClassifier": { + "n_estimators": [1, 2, 3], + "verbose": [-1], + }, + } + + If some model is not present in ``param_grid``, the default parameters will be used. + + For how to define ``scoring`` check :meth:`~metalearners.metalearner.MetaLearner.evaluate`. + + ``verbose`` will be passed to `joblib.Parallel `_. + + Check TODO to see an example of the usage of this class. + """ + + def __init__( + self, + metalearner_factory: type[MetaLearner], + metalearner_params: Mapping, + base_learner_grid: Mapping[str, Sequence[type[_ScikitModel]]], + param_grid: Mapping[str, Mapping[str, Sequence]], + scoring: Mapping[str, list[str | Callable]] | None = None, + cv: int = 5, + n_jobs: int | None = None, + random_state: int | None = None, + verbose: int = 0, + ): + self.metalearner_factory = metalearner_factory + self.metalearner_params = metalearner_params + self.scoring = scoring + self.cv = cv + self.n_jobs = n_jobs + self.random_state = random_state + self.verbose = verbose + + self.raw_results_: Sequence[_CVResult] | None = None + self.cv_results_: pd.DataFrame | None = None + + expected_base_models = set( + metalearner_factory.nuisance_model_specifications().keys() + ) | set(metalearner_factory.treatment_model_specifications().keys()) + + if set(base_learner_grid.keys()) != expected_base_models: + raise ValueError + + all_base_learners = set(reduce(add, base_learner_grid.values())) + param_grid_empty: Mapping[str, Mapping[str, Sequence]] = { + k.__name__: {} for k in all_base_learners if k.__name__ not in param_grid + } + self.base_learner_grid = list(ParameterGrid(base_learner_grid)) + + # Mapping does not have union "|" operator, see + # https://peps.python.org/pep-0584/#what-about-mapping-and-mutablemapping + full_param_grid = {**param_grid_empty, **param_grid} + self.base_learner_param_grids = { + base_learner: list(ParameterGrid(base_learner_param_grid)) + for base_learner, base_learner_param_grid in full_param_grid.items() + } + + def fit( + self, + X: Matrix, + y: Vector, + w: Vector, + oos_method: OosMethod = OVERALL, + **kwargs, + ): + """Run fit with all sets of parameters. + + ``kwargs`` will be passed through to the :meth:`~metalearners.metalearner.MetaLearner.fit` + call of each individual MetaLearner. + """ + cv = KFold(n_splits=self.cv, shuffle=True, random_state=self.random_state) + + nuisance_models_no_propensity = set( + self.metalearner_factory.nuisance_model_specifications().keys() + ) - {PROPENSITY_MODEL} + treatment_models = set( + self.metalearner_factory.treatment_model_specifications().keys() + ) + + all_models = set( + self.metalearner_factory.nuisance_model_specifications().keys() + ) | set(self.metalearner_factory.treatment_model_specifications().keys()) + + jobs: list[_FitAndScoreJob] = [] + for cv_index, (train_indices, test_indices) in enumerate(cv.split(X)): + X_train = index_matrix(X, train_indices) + X_test = index_matrix(X, test_indices) + y_train = index_vector(y, train_indices) + y_test = index_vector(y, test_indices) + w_train = index_vector(w, train_indices) + w_test = index_vector(w, test_indices) + for base_learners in self.base_learner_grid: + nuisance_model_factory = { + model_kind: base_learners[model_kind] + for model_kind in nuisance_models_no_propensity + } + treatment_model_factory = { + model_kind: base_learners[model_kind] + for model_kind in treatment_models + } + propensity_model_factory = base_learners.get(PROPENSITY_MODEL, None) + + param_grid = { + model_kind: self.base_learner_param_grids[ + base_learners[model_kind].__name__ + ] + for model_kind in all_models + } + for params in ParameterGrid(param_grid): + nuisance_model_params = { + model_kind: params[model_kind] + for model_kind in nuisance_models_no_propensity + } + treatment_model_params = { + model_kind: params[model_kind] + for model_kind in treatment_models + } + propensity_model_params = params.get(PROPENSITY_MODEL, None) + + ml = self.metalearner_factory( + **self.metalearner_params, + nuisance_model_factory=nuisance_model_factory, + treatment_model_factory=treatment_model_factory, + propensity_model_factory=propensity_model_factory, + nuisance_model_params=nuisance_model_params, + treatment_model_params=treatment_model_params, + propensity_model_params=propensity_model_params, + random_state=self.random_state, + ) + + jobs.append( + _FitAndScoreJob( + metalearner=ml, + X_train=X_train, + y_train=y_train, + w_train=w_train, + X_test=X_test, + y_test=y_test, + w_test=w_test, + oos_method=oos_method, + scoring=self.scoring, + kwargs=kwargs, + cv_index=cv_index, + ) + ) + + parallel = Parallel(n_jobs=self.n_jobs, verbose=self.verbose) + raw_results = parallel(delayed(_fit_and_score)(job) for job in jobs) + self.raw_results_ = raw_results + self.cv_results_ = _format_results(results=raw_results) diff --git a/tests/test_metalearner_grid_search_cv.py b/tests/test_metalearner_grid_search_cv.py new file mode 100644 index 0000000..f8d1638 --- /dev/null +++ b/tests/test_metalearner_grid_search_cv.py @@ -0,0 +1,114 @@ +# # Copyright (c) QuantCo 2024-2024 +# # SPDX-License-Identifier: BSD-3-Clause + + +import pytest +from lightgbm import LGBMClassifier, LGBMRegressor +from sklearn.linear_model import LinearRegression, LogisticRegression + +from metalearners.drlearner import DRLearner +from metalearners.metalearner_grid_search_cv import MetaLearnerGridSearchCV +from metalearners.rlearner import RLearner +from metalearners.slearner import SLearner +from metalearners.tlearner import TLearner +from metalearners.xlearner import XLearner + + +@pytest.mark.parametrize( + "metalearner_factory, is_classification, base_learner_grid, param_grid, expected_n_configs", + [ + ( + SLearner, + False, + {"base_model": [LinearRegression, LGBMRegressor]}, + {"LGBMRegressor": {"n_estimators": [1, 2]}}, + 3, + ), + ( + SLearner, + True, + {"base_model": [LogisticRegression, LGBMClassifier]}, + {"LGBMClassifier": {"n_estimators": [1, 2]}}, + 3, + ), + ( + TLearner, + False, + {"variant_outcome_model": [LinearRegression, LGBMRegressor]}, + {"LGBMRegressor": {"n_estimators": [1, 2, 3]}}, + 4, + ), + ( + XLearner, + False, + { + "variant_outcome_model": [LinearRegression], + "propensity_model": [LGBMClassifier], + "control_effect_model": [LinearRegression], + "treatment_effect_model": [LinearRegression], + }, + {"LGBMClassifier": {"n_estimators": [1, 2, 3]}}, + 3, + ), + ( + RLearner, + False, + { + "outcome_model": [LinearRegression], + "propensity_model": [LGBMClassifier], + "treatment_model": [LGBMRegressor], + }, + { + "LGBMClassifier": {"n_estimators": [1, 2, 3]}, + "LGBMRegressor": {"n_estimators": [1, 2, 3]}, + }, + 9, + ), + ( + DRLearner, + False, + { + "variant_outcome_model": [LinearRegression], + "propensity_model": [LGBMClassifier], + "treatment_model": [LinearRegression], + }, + {"LGBMClassifier": {"n_estimators": [1, 2, 3, 4]}}, + 4, + ), + ], +) +@pytest.mark.parametrize("n_variants", [2, 5]) +@pytest.mark.parametrize("cv", [2, 3]) +def test_metalearnergridsearchcv_smoke( + metalearner_factory, + is_classification, + n_variants, + base_learner_grid, + param_grid, + cv, + rng, + expected_n_configs, +): + metalearner_params = { + "is_classification": is_classification, + "n_variants": n_variants, + "n_folds": 2, + } + gs = MetaLearnerGridSearchCV( + metalearner_factory=metalearner_factory, + metalearner_params=metalearner_params, + base_learner_grid=base_learner_grid, + param_grid=param_grid, + cv=cv, + ) + n_samples = 250 + X = rng.standard_normal((n_samples, 3)) + if is_classification: + y = rng.integers(0, 2, n_samples) + else: + y = rng.standard_normal(n_samples) + w = rng.integers(0, n_variants, n_samples) + + gs.fit(X, y, w) + assert gs.cv_results_ is not None + assert gs.cv_results_.shape[0] == expected_n_configs * cv From e0a92397ffcae22900a53700f517cf92b12071a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 14 Jun 2024 15:13:27 +0200 Subject: [PATCH 08/59] Update CHANGELOG --- CHANGELOG.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f5a59b2..2e7ee48 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,6 +10,10 @@ Changelog 0.4.0 (2024-06-**) ------------------ +* Added ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and + implemented the abstract method for the :class:`metalearners.XLearner` and + :class:`metalearners.DRLearner`. + * Implemented :meth:`metalearners.cross_fit_estimator.CrossFitEstimator.clone`. * Added ``n_jobs_base_learners`` to :meth:`metalearners.metalearner.MetaLearner.fit`. From f0d6f6ce5c6d7c72b3dcba0b7f0b600801d77ff0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 14 Jun 2024 15:16:00 +0200 Subject: [PATCH 09/59] Update CHANGELOG --- CHANGELOG.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2e7ee48..fa81c33 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,6 +10,8 @@ Changelog 0.4.0 (2024-06-**) ------------------ +* Implemented :class:`metalearners.metalearner_grid_search_cv.MetaLearnerGridSearchCV`. + * Added ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and implemented the abstract method for the :class:`metalearners.XLearner` and :class:`metalearners.DRLearner`. From 476a4aedb0c82e7b42cf147d1365316aaddc82de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:46:07 +0200 Subject: [PATCH 10/59] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 22f53d1..3666689 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -135,7 +135,7 @@ def _validate_n_folds_synchronize(n_folds: dict[str, int]) -> None: raise ValueError("Need at least two folds to use synchronization.") -def _evaluate_model( +def _evaluate_model_kind( cfes: Sequence[CrossFitEstimator], X: Sequence[Matrix], y: Sequence[Vector], From 1c4c060dcb2a3f54cbe89a22eeacc21c64d3769b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:46:13 +0200 Subject: [PATCH 11/59] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 3666689..091b1d4 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -137,7 +137,7 @@ def _validate_n_folds_synchronize(n_folds: dict[str, int]) -> None: def _evaluate_model_kind( cfes: Sequence[CrossFitEstimator], - X: Sequence[Matrix], + Xs: Sequence[Matrix], y: Sequence[Vector], scorers: Sequence[str | Callable], model_kind: str, From 49f1556a7a7f5970aaadc1a6206021eca650d660 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:46:19 +0200 Subject: [PATCH 12/59] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 091b1d4..513701c 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -138,7 +138,7 @@ def _validate_n_folds_synchronize(n_folds: dict[str, int]) -> None: def _evaluate_model_kind( cfes: Sequence[CrossFitEstimator], Xs: Sequence[Matrix], - y: Sequence[Vector], + ys: Sequence[Vector], scorers: Sequence[str | Callable], model_kind: str, is_oos: bool, From d5280459812b138b893e271fcc4869047570076e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:46:31 +0200 Subject: [PATCH 13/59] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 513701c..069f7e9 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -151,7 +151,7 @@ def _evaluate_model_kind( for idx, scorer in enumerate(scorers): if isinstance(scorer, str): scorer_str = scorer - scorer_call: Callable = get_scorer(scorer) + scorer_callable: Callable = get_scorer(scorer) else: scorer_str = f"custom_scorer_{idx}" scorer_call = scorer From 631505ebf5a6d851db82b7ca1137d5f15282ede0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:46:37 +0200 Subject: [PATCH 14/59] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 069f7e9..cc66c3d 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -150,7 +150,7 @@ def _evaluate_model_kind( evaluation_metrics: dict[str, float] = {} for idx, scorer in enumerate(scorers): if isinstance(scorer, str): - scorer_str = scorer + scorer_name = scorer scorer_callable: Callable = get_scorer(scorer) else: scorer_str = f"custom_scorer_{idx}" From e0e70fa49dc0a62aa4f66292fccc681662f3ecd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Mon, 24 Jun 2024 08:49:56 +0200 Subject: [PATCH 15/59] Fix naming --- metalearners/drlearner.py | 20 ++++++++++---------- metalearners/metalearner.py | 8 ++++---- metalearners/rlearner.py | 14 +++++++------- metalearners/slearner.py | 8 ++++---- metalearners/tlearner.py | 8 ++++---- metalearners/xlearner.py | 26 +++++++++++++------------- 6 files changed, 42 insertions(+), 42 deletions(-) diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index 93b86a5..e50e3c6 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -25,7 +25,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, - _evaluate_model, + _evaluate_model_kind, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -221,10 +221,10 @@ def evaluate( masks = [] for tv in range(self.n_variants): masks.append(w == tv) - variant_outcome_evaluation = _evaluate_model( + variant_outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], - X=[X[w == tv] for tv in range(self.n_variants)], - y=[y[w == tv] for tv in range(self.n_variants)], + Xs=[X[w == tv] for tv in range(self.n_variants)], + ys=[y[w == tv] for tv in range(self.n_variants)], scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, @@ -232,10 +232,10 @@ def evaluate( is_treatment=False, ) - propensity_evaluation = _evaluate_model( + propensity_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[PROPENSITY_MODEL], - X=[X], - y=[w], + Xs=[X], + ys=[w], scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), model_kind=PROPENSITY_MODEL, is_oos=is_oos, @@ -255,10 +255,10 @@ def evaluate( ) pseudo_outcome.append(tv_pseudo_outcome) - treatment_evaluation = _evaluate_model( + treatment_evaluation = _evaluate_model_kind( self._treatment_models[TREATMENT_MODEL], - X=[X for _ in range(1, self.n_variants)], - y=pseudo_outcome, + Xs=[X for _ in range(1, self.n_variants)], + ys=pseudo_outcome, scorers=scoring.get(TREATMENT_MODEL, ["neg_root_mean_squared_error"]), model_kind=TREATMENT_MODEL, is_oos=is_oos, diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index cc66c3d..596a5ad 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -153,8 +153,8 @@ def _evaluate_model_kind( scorer_name = scorer scorer_callable: Callable = get_scorer(scorer) else: - scorer_str = f"custom_scorer_{idx}" - scorer_call = scorer + scorer_name = f"custom_scorer_{idx}" + scorer_callable = scorer for i, cfe in enumerate(cfes): if is_treatment: treatment_variant = i + 1 @@ -164,9 +164,9 @@ def _evaluate_model_kind( index_str = "" else: index_str = f"{i}_" - name = f"{prefix}{index_str}{scorer_str}" + name = f"{prefix}{index_str}{scorer_name}" with _PredictContext(cfe, is_oos, oos_method) as modified_cfe: - evaluation_metrics[name] = scorer_call(modified_cfe, X[i], y[i]) + evaluation_metrics[name] = scorer_callable(modified_cfe, Xs[i], ys[i]) return evaluation_metrics diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index b1e3525..d017a38 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -27,7 +27,7 @@ TREATMENT, TREATMENT_MODEL, MetaLearner, - _evaluate_model, + _evaluate_model_kind, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -344,10 +344,10 @@ def evaluate( scoring = {} self._validate_scoring(scoring=scoring) - propensity_evaluation = _evaluate_model( + propensity_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[PROPENSITY_MODEL], - X=[X], - y=[w], + Xs=[X], + ys=[w], scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), model_kind=PROPENSITY_MODEL, is_oos=is_oos, @@ -358,10 +358,10 @@ def evaluate( default_metric = ( "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" ) - outcome_evaluation = _evaluate_model( + outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[OUTCOME_MODEL], - X=[X], - y=[y], + Xs=[X], + ys=[y], scorers=scoring.get(OUTCOME_MODEL, [default_metric]), model_kind=OUTCOME_MODEL, is_oos=is_oos, diff --git a/metalearners/slearner.py b/metalearners/slearner.py index 718464a..baa5d6a 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -18,7 +18,7 @@ from metalearners.metalearner import ( NUISANCE, MetaLearner, - _evaluate_model, + _evaluate_model_kind, _ModelSpecifications, ) @@ -168,10 +168,10 @@ def evaluate( X_with_w = _append_treatment_to_covariates( X, w, self._supports_categoricals, self.n_variants ) - return _evaluate_model( + return _evaluate_model_kind( cfes=self._nuisance_models[_BASE_MODEL], - X=[X_with_w], - y=[y], + Xs=[X_with_w], + ys=[y], scorers=scoring.get(_BASE_MODEL, [default_metric]), model_kind=_BASE_MODEL, is_oos=is_oos, diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index 9a5b8d9..befcc7f 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -16,7 +16,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, - _evaluate_model, + _evaluate_model_kind, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -129,10 +129,10 @@ def evaluate( masks = [] for tv in range(self.n_variants): masks.append(w == tv) - return _evaluate_model( + return _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], - X=[X[w == tv] for tv in range(self.n_variants)], - y=[y[w == tv] for tv in range(self.n_variants)], + Xs=[X[w == tv] for tv in range(self.n_variants)], + ys=[y[w == tv] for tv in range(self.n_variants)], scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index d2b691d..434d1fd 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -23,7 +23,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, - _evaluate_model, + _evaluate_model_kind, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -300,10 +300,10 @@ def evaluate( masks = [] for tv in range(self.n_variants): masks.append(w == tv) - variant_outcome_evaluation = _evaluate_model( + variant_outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], - X=[X[w == tv] for tv in range(self.n_variants)], - y=[y[w == tv] for tv in range(self.n_variants)], + Xs=[X[w == tv] for tv in range(self.n_variants)], + ys=[y[w == tv] for tv in range(self.n_variants)], scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, @@ -311,10 +311,10 @@ def evaluate( is_treatment=False, ) - propensity_evaluation = _evaluate_model( + propensity_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[PROPENSITY_MODEL], - X=[X], - y=[w], + Xs=[X], + ys=[w], scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), model_kind=PROPENSITY_MODEL, is_oos=is_oos, @@ -331,10 +331,10 @@ def evaluate( imputed_te_control.append(tv_imputed_te_control) imputed_te_treatment.append(tv_imputed_te_treatment) - te_treatment_evaluation = _evaluate_model( + te_treatment_evaluation = _evaluate_model_kind( self._treatment_models[TREATMENT_EFFECT_MODEL], - X=[X[w == tv] for tv in range(1, self.n_variants)], - y=imputed_te_treatment, + Xs=[X[w == tv] for tv in range(1, self.n_variants)], + ys=imputed_te_treatment, scorers=scoring.get( TREATMENT_EFFECT_MODEL, ["neg_root_mean_squared_error"] ), @@ -344,10 +344,10 @@ def evaluate( is_treatment=True, ) - te_control_evaluation = _evaluate_model( + te_control_evaluation = _evaluate_model_kind( self._treatment_models[CONTROL_EFFECT_MODEL], - X=[X[w == 0] for _ in range(1, self.n_variants)], - y=imputed_te_control, + Xs=[X[w == 0] for _ in range(1, self.n_variants)], + ys=imputed_te_control, scorers=scoring.get(CONTROL_EFFECT_MODEL, ["neg_root_mean_squared_error"]), model_kind=CONTROL_EFFECT_MODEL, is_oos=is_oos, From e0cd5638cc0e81a24addf5b148bf060cc59f87fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:56:43 +0200 Subject: [PATCH 16/59] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 596a5ad..e1b6bab 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -873,7 +873,7 @@ def evaluate( oos_method: OosMethod = OVERALL, scoring: Mapping[str, list[str | Callable]] | None = None, ) -> dict[str, float]: - r"""Evaluate the models models contained in the MetaLearner. + r"""Evaluate the the MetaLearner. ``scoring`` keys must be a subset of the names of the models contained in the MetaLearner, for information about this names check :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` From fc01491987cf0df4d33ea76dba6aa4cba5dba92a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Mon, 24 Jun 2024 08:57:07 +0200 Subject: [PATCH 17/59] Fix docs --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index e1b6bab..5be35cc 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -873,7 +873,7 @@ def evaluate( oos_method: OosMethod = OVERALL, scoring: Mapping[str, list[str | Callable]] | None = None, ) -> dict[str, float]: - r"""Evaluate the the MetaLearner. + r"""Evaluate the MetaLearner. ``scoring`` keys must be a subset of the names of the models contained in the MetaLearner, for information about this names check :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` From 01501067b288760b5db9b2857699671541a54a61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Mon, 24 Jun 2024 09:13:10 +0200 Subject: [PATCH 18/59] Don't force subset --- metalearners/drlearner.py | 1 - metalearners/metalearner.py | 17 ++++------------- metalearners/rlearner.py | 1 - metalearners/slearner.py | 1 - metalearners/tlearner.py | 1 - metalearners/xlearner.py | 1 - 6 files changed, 4 insertions(+), 18 deletions(-) diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index e50e3c6..8d802e3 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -213,7 +213,6 @@ def evaluate( ) -> dict[str, float]: if scoring is None: scoring = {} - self._validate_scoring(scoring=scoring) default_metric = ( "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 5be35cc..76a3716 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -348,16 +348,6 @@ def _validate_models(self) -> None: factory, predict_method, name=f"treatment model {model_kind}" ) - @classmethod - def _validate_scoring(cls, scoring: Mapping[str, list[str | Callable]]): - if not set(scoring.keys()) <= ( - set(cls.nuisance_model_specifications().keys()) - | set(cls.treatment_model_specifications().keys()) - ): - raise ValueError( - "scoring dict keys need to be a subset of the model names in the MetaLearner" - ) - def _qualified_fit_params( self, fit_params: None | dict, @@ -875,9 +865,10 @@ def evaluate( ) -> dict[str, float]: r"""Evaluate the MetaLearner. - ``scoring`` keys must be a subset of the names of the models contained in the - MetaLearner, for information about this names check :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` - and :meth:`~metalearners.metalearner.MetaLearner.treatment_model_specifications`. + The keys in ``scoring`` which are not a name of a model contained in the MetaLearner + will be ignored, for information about this names check + :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` and + :meth:`~metalearners.metalearner.MetaLearner.treatment_model_specifications`. The values must be a list of: * ``string`` representing a ``sklearn`` scoring method. Check diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index d017a38..b4824ba 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -342,7 +342,6 @@ def evaluate( parameter is ignored.""" if scoring is None: scoring = {} - self._validate_scoring(scoring=scoring) propensity_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[PROPENSITY_MODEL], diff --git a/metalearners/slearner.py b/metalearners/slearner.py index baa5d6a..a81b211 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -159,7 +159,6 @@ def evaluate( ) -> dict[str, float]: if scoring is None: scoring = {} - self._validate_scoring(scoring=scoring) default_metric = ( "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index befcc7f..a82f3da 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -120,7 +120,6 @@ def evaluate( ) -> dict[str, float]: if scoring is None: scoring = {} - self._validate_scoring(scoring=scoring) default_metric = ( "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index 434d1fd..8103ed9 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -292,7 +292,6 @@ def evaluate( ) -> dict[str, float]: if scoring is None: scoring = {} - self._validate_scoring(scoring=scoring) default_metric = ( "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" From 6b595bdc60f934813fa485c953a3ae58d907c0b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Mon, 24 Jun 2024 09:14:50 +0200 Subject: [PATCH 19/59] Add test to ignore --- tests/test_learner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_learner.py b/tests/test_learner.py index 404fbeb..ca2823c 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -409,7 +409,10 @@ def new_score_2(y, y_pred): ( "T", False, - {"variant_outcome_model": [new_score, make_scorer(new_score_2)]}, + { + "variant_outcome_model": [new_score, make_scorer(new_score_2)], + "to_ignore": [], + }, { "variant_outcome_model_0_custom_scorer_0", "variant_outcome_model_0_custom_scorer_1", From 19f895c493bbfc8baa2defdfc9d49bffdce0271a Mon Sep 17 00:00:00 2001 From: Kevin Klein <7267523+kklein@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:37:45 +0200 Subject: [PATCH 20/59] Centralize generation of default scoring (#22) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Centralize generation of default scoring. * Reuse more type hints. * Update metalearners/metalearner.py Co-authored-by: Francesc Martí Escofet <154450563+FrancescMartiEscofetQC@users.noreply.github.com> * Update metalearners/metalearner.py Co-authored-by: Francesc Martí Escofet <154450563+FrancescMartiEscofetQC@users.noreply.github.com> * Apply pchs. --------- Co-authored-by: Francesc Martí Escofet <154450563+FrancescMartiEscofetQC@users.noreply.github.com> --- metalearners/_typing.py | 7 +++++-- metalearners/_utils.py | 6 ++++++ metalearners/drlearner.py | 17 ++++++----------- metalearners/metalearner.py | 23 +++++++++++++++++++++++ metalearners/rlearner.py | 15 +++++---------- metalearners/slearner.py | 14 ++++---------- metalearners/tlearner.py | 16 +++++----------- metalearners/xlearner.py | 22 ++++++++-------------- 8 files changed, 62 insertions(+), 58 deletions(-) diff --git a/metalearners/_typing.py b/metalearners/_typing.py index a7f39d4..b937317 100644 --- a/metalearners/_typing.py +++ b/metalearners/_typing.py @@ -1,7 +1,7 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause -from collections.abc import Collection, Mapping +from collections.abc import Callable, Collection, Mapping, Sequence from typing import Literal, Protocol, Union import numpy as np @@ -29,7 +29,6 @@ class _ScikitModel(Protocol): # https://stackoverflow.com/questions/54868698/what-type-is-a-sklearn-model/60542986#60542986 def fit(self, X, y, *params, **kwargs): ... - def predict(self, X, *params, **kwargs): ... def score(self, X, y, **kwargs): ... @@ -44,3 +43,7 @@ def set_params(self, **params): ... # For instance, if converting the Generator resulting from a call to # sklearn.model_selection.KFold.split to a list we obtain this type. SplitIndices = list[tuple[np.ndarray, np.ndarray]] + +Scorer = str | Callable +Scorers = Sequence[Scorer] +Scoring = Mapping[str, Scorers] diff --git a/metalearners/_utils.py b/metalearners/_utils.py index 1d82b7b..095c2f8 100644 --- a/metalearners/_utils.py +++ b/metalearners/_utils.py @@ -480,3 +480,9 @@ def _decorator(func): return func return _decorator + + +def default_metric(predict_method: PredictMethod) -> str: + if predict_method == _PREDICT_PROBA: + return "neg_log_loss" + return "neg_root_mean_squared_error" diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index 8d802e3..ca5dd1a 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -1,13 +1,12 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause -from collections.abc import Callable, Mapping import numpy as np from joblib import Parallel, delayed from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector from metalearners._utils import ( clip_element_absolute_value_to_epsilon, get_one, @@ -209,14 +208,10 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, ) -> dict[str, float]: - if scoring is None: - scoring = {} + safe_scoring = self._scoring(scoring) - default_metric = ( - "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" - ) masks = [] for tv in range(self.n_variants): masks.append(w == tv) @@ -224,7 +219,7 @@ def evaluate( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], ys=[y[w == tv] for tv in range(self.n_variants)], - scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + scorers=safe_scoring[VARIANT_OUTCOME_MODEL], model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, @@ -235,7 +230,7 @@ def evaluate( cfes=self._nuisance_models[PROPENSITY_MODEL], Xs=[X], ys=[w], - scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + scorers=safe_scoring[PROPENSITY_MODEL], model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, @@ -258,7 +253,7 @@ def evaluate( self._treatment_models[TREATMENT_MODEL], Xs=[X for _ in range(1, self.n_variants)], ys=pseudo_outcome, - scorers=scoring.get(TREATMENT_MODEL, ["neg_root_mean_squared_error"]), + scorers=safe_scoring[TREATMENT_MODEL], model_kind=TREATMENT_MODEL, is_oos=is_oos, oos_method=oos_method, diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 914b62b..2fcdcdf 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -21,11 +21,13 @@ OosMethod, Params, PredictMethod, + Scoring, SplitIndices, Vector, _ScikitModel, ) from metalearners._utils import ( + default_metric, index_matrix, validate_model_and_predict_method, validate_number_positive, @@ -1025,6 +1027,27 @@ def shap_values( shap_explainer_params=shap_explainer_params, ) + def _scoring(self, scoring: Scoring | None) -> Scoring: + + def _default_scoring() -> Scoring: + return { + nuisance_model: [ + default_metric(model_specifications["predict_method"](self)) + ] + for nuisance_model, model_specifications in self.nuisance_model_specifications().items() + } | { + treatment_model: [ + default_metric(model_specifications["predict_method"](self)) + ] + for treatment_model, model_specifications in self.treatment_model_specifications().items() + } + + default_scoring = _default_scoring() + + if scoring is None: + return default_scoring + return dict(default_scoring) | dict(scoring) + class _ConditionalAverageOutcomeMetaLearner(MetaLearner, ABC): diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index b4824ba..49258bd 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -1,14 +1,13 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause -from collections.abc import Callable, Mapping import numpy as np from joblib import Parallel, delayed from sklearn.metrics import root_mean_squared_error from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector from metalearners._utils import ( clip_element_absolute_value_to_epsilon, copydoc, @@ -335,33 +334,29 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, ) -> dict[str, float]: """In the RLearner case, the ``"treatment_model"`` is always evaluated with the :func:`~metalearners.rlearner.r_loss` and the ``scoring["treatment_model"]`` parameter is ignored.""" - if scoring is None: - scoring = {} + safe_scoring = self._scoring(scoring) propensity_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[PROPENSITY_MODEL], Xs=[X], ys=[w], - scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + scorers=safe_scoring[PROPENSITY_MODEL], model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, is_treatment=False, ) - default_metric = ( - "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" - ) outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[OUTCOME_MODEL], Xs=[X], ys=[y], - scorers=scoring.get(OUTCOME_MODEL, [default_metric]), + scorers=safe_scoring[OUTCOME_MODEL], model_kind=OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, diff --git a/metalearners/slearner.py b/metalearners/slearner.py index 0d50b12..aacc93b 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -2,13 +2,12 @@ # # SPDX-License-Identifier: BSD-3-Clause import warnings -from collections.abc import Callable, Mapping import numpy as np import pandas as pd from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector from metalearners._utils import ( convert_treatment, get_one, @@ -158,14 +157,9 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, ) -> dict[str, float]: - if scoring is None: - scoring = {} - - default_metric = ( - "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" - ) + safe_scoring = self._scoring(scoring) X_with_w = _append_treatment_to_covariates( X, w, self._supports_categoricals, self.n_variants @@ -174,7 +168,7 @@ def evaluate( cfes=self._nuisance_models[_BASE_MODEL], Xs=[X_with_w], ys=[y], - scorers=scoring.get(_BASE_MODEL, [default_metric]), + scorers=safe_scoring[_BASE_MODEL], model_kind=_BASE_MODEL, is_oos=is_oos, oos_method=oos_method, diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index a82f3da..24275fc 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -2,13 +2,11 @@ # # SPDX-License-Identifier: BSD-3-Clause -from collections.abc import Callable, Mapping - import numpy as np from joblib import Parallel, delayed from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector from metalearners._utils import index_matrix from metalearners.cross_fit_estimator import OVERALL from metalearners.metalearner import ( @@ -116,23 +114,19 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, ) -> dict[str, float]: - if scoring is None: - scoring = {} - - default_metric = ( - "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" - ) + safe_scoring = self._scoring(scoring) masks = [] for tv in range(self.n_variants): masks.append(w == tv) + return _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], ys=[y[w == tv] for tv in range(self.n_variants)], - scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + scorers=safe_scoring[VARIANT_OUTCOME_MODEL], model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index 8103ed9..611bc87 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -1,13 +1,12 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause -from collections.abc import Callable, Mapping import numpy as np from joblib import Parallel, delayed from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector from metalearners._utils import ( get_one, get_predict, @@ -288,22 +287,19 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, ) -> dict[str, float]: - if scoring is None: - scoring = {} + safe_scoring = self._scoring(scoring) - default_metric = ( - "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" - ) masks = [] for tv in range(self.n_variants): masks.append(w == tv) + variant_outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], ys=[y[w == tv] for tv in range(self.n_variants)], - scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + scorers=safe_scoring[VARIANT_OUTCOME_MODEL], model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, @@ -314,7 +310,7 @@ def evaluate( cfes=self._nuisance_models[PROPENSITY_MODEL], Xs=[X], ys=[w], - scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + scorers=safe_scoring[PROPENSITY_MODEL], model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, @@ -334,9 +330,7 @@ def evaluate( self._treatment_models[TREATMENT_EFFECT_MODEL], Xs=[X[w == tv] for tv in range(1, self.n_variants)], ys=imputed_te_treatment, - scorers=scoring.get( - TREATMENT_EFFECT_MODEL, ["neg_root_mean_squared_error"] - ), + scorers=safe_scoring[TREATMENT_EFFECT_MODEL], model_kind=TREATMENT_EFFECT_MODEL, is_oos=is_oos, oos_method=oos_method, @@ -347,7 +341,7 @@ def evaluate( self._treatment_models[CONTROL_EFFECT_MODEL], Xs=[X[w == 0] for _ in range(1, self.n_variants)], ys=imputed_te_control, - scorers=scoring.get(CONTROL_EFFECT_MODEL, ["neg_root_mean_squared_error"]), + scorers=safe_scoring[CONTROL_EFFECT_MODEL], model_kind=CONTROL_EFFECT_MODEL, is_oos=is_oos, oos_method=oos_method, From 12d41b5f389c989c74ac7793522262ba93510406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:27:58 +0200 Subject: [PATCH 21/59] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 2fcdcdf..ea5dcc5 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -879,7 +879,7 @@ def evaluate( for the possible values. * ``Callable`` with signature ``scorer(estimator, X, y_true, **kwargs)``. We recommend using `sklearn.metrics.make_scorer `_ - to create this callables. + to create such a ``Callable``. If some model name is not present in the keys of ``scoring`` then the default used metrics will be ``neg_log_loss`` if it is a classifier and ``neg_root_mean_squared_error`` From 4a36e2513cbb23c55dd66de5d1ca0277cf958d1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:29:08 +0200 Subject: [PATCH 22/59] Update metalearners/tlearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/tlearner.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index 24275fc..875a986 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -118,9 +118,6 @@ def evaluate( ) -> dict[str, float]: safe_scoring = self._scoring(scoring) - masks = [] - for tv in range(self.n_variants): - masks.append(w == tv) return _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], From 5f0987f1492762a3dfde0e6d1613f110bb618fbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:29:27 +0200 Subject: [PATCH 23/59] Update metalearners/xlearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/xlearner.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index 611bc87..319a7ad 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -291,10 +291,6 @@ def evaluate( ) -> dict[str, float]: safe_scoring = self._scoring(scoring) - masks = [] - for tv in range(self.n_variants): - masks.append(w == tv) - variant_outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], From d76dc7447ff3d20e9b473ae40e53385b4c30a8ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:31:39 +0200 Subject: [PATCH 24/59] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index ea5dcc5..6c1604a 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -144,7 +144,7 @@ def _evaluate_model_kind( scorers: Sequence[str | Callable], model_kind: str, is_oos: bool, - is_treatment: bool, + is_treatment_model: bool, oos_method: OosMethod = OVERALL, ) -> dict[str, float]: """Helper function to evaluate all the models of the same model kind.""" From 05787f9d1652d34b9bd32bd9d16646bb547f143b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:32:32 +0200 Subject: [PATCH 25/59] Rename --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 6c1604a..42c4377 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -158,7 +158,7 @@ def _evaluate_model_kind( scorer_name = f"custom_scorer_{idx}" scorer_callable = scorer for i, cfe in enumerate(cfes): - if is_treatment: + if is_treatment_model: treatment_variant = i + 1 index_str = f"{treatment_variant}_vs_0_" else: From dc946dc0ee042ea1805b1f6d1c4fbb199d6cd33a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:35:05 +0200 Subject: [PATCH 26/59] Rename --- metalearners/drlearner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index ca5dd1a..40e8a09 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -223,7 +223,7 @@ def evaluate( model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) propensity_evaluation = _evaluate_model_kind( @@ -234,7 +234,7 @@ def evaluate( model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) pseudo_outcome: list[np.ndarray] = [] @@ -257,7 +257,7 @@ def evaluate( model_kind=TREATMENT_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=True, + is_treatment_model=True, ) return variant_outcome_evaluation | propensity_evaluation | treatment_evaluation From ba895a350f00c66670aeb86030519cb609326f17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:35:38 +0200 Subject: [PATCH 27/59] Rename --- metalearners/rlearner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index 49258bd..90b4e1f 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -349,7 +349,7 @@ def evaluate( model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) outcome_evaluation = _evaluate_model_kind( @@ -360,7 +360,7 @@ def evaluate( model_kind=OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) # TODO: improve this? generalize it to other metalearners? From e81d152172c3a1d92824f83ea4467f3defc937ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:35:58 +0200 Subject: [PATCH 28/59] Rename --- metalearners/slearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/slearner.py b/metalearners/slearner.py index aacc93b..4e3e2b7 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -172,7 +172,7 @@ def evaluate( model_kind=_BASE_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) def predict_conditional_average_outcomes( From 9d2bbb9b1575ca99a2c05fce053ca989c2c75393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:36:17 +0200 Subject: [PATCH 29/59] Rename --- metalearners/tlearner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index 875a986..11dacaa 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -118,7 +118,6 @@ def evaluate( ) -> dict[str, float]: safe_scoring = self._scoring(scoring) - return _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], @@ -127,5 +126,5 @@ def evaluate( model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) From c4de4f17b26b021860aa7fc62f77577a58c67272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:36:47 +0200 Subject: [PATCH 30/59] Rename --- metalearners/xlearner.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index 319a7ad..839ba57 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -299,7 +299,7 @@ def evaluate( model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) propensity_evaluation = _evaluate_model_kind( @@ -310,7 +310,7 @@ def evaluate( model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) imputed_te_control: list[np.ndarray] = [] @@ -330,7 +330,7 @@ def evaluate( model_kind=TREATMENT_EFFECT_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=True, + is_treatment_model=True, ) te_control_evaluation = _evaluate_model_kind( @@ -341,7 +341,7 @@ def evaluate( model_kind=CONTROL_EFFECT_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=True, + is_treatment_model=True, ) return ( From 7fa87940285151fc88b4fd4def4dced461d5b14d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:40:05 +0200 Subject: [PATCH 31/59] Update metalearners/drlearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/drlearner.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index 40e8a09..1df7291 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -212,9 +212,6 @@ def evaluate( ) -> dict[str, float]: safe_scoring = self._scoring(scoring) - masks = [] - for tv in range(self.n_variants): - masks.append(w == tv) variant_outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], From 8691a02f65484980c76cbe64e1a2c6537e4c5199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:40:12 +0200 Subject: [PATCH 32/59] Update metalearners/_utils.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/_utils.py b/metalearners/_utils.py index 095c2f8..cf9fde1 100644 --- a/metalearners/_utils.py +++ b/metalearners/_utils.py @@ -468,7 +468,7 @@ def simplify_output_2d(tensor: np.ndarray) -> np.ndarray: # Taken from https://stackoverflow.com/questions/13741998/is-there-a-way-to-let-classes-inherit-the-documentation-of-their-superclass-with def copydoc(fromfunc, sep="\n"): """ - Decorator: Copy the docstring of `fromfunc` + Decorator: Copy the docstring of ``fromfunc`` """ def _decorator(func): From 99f4d4a4a2c7629c928ed1f13ab9eb0983a5d275 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Tue, 25 Jun 2024 11:58:57 +0200 Subject: [PATCH 33/59] Fix license --- metalearners/metalearner_grid_search_cv.py | 4 ++-- tests/test_metalearner_grid_search_cv.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/metalearners/metalearner_grid_search_cv.py b/metalearners/metalearner_grid_search_cv.py index 5dc59dd..aa32294 100644 --- a/metalearners/metalearner_grid_search_cv.py +++ b/metalearners/metalearner_grid_search_cv.py @@ -1,5 +1,5 @@ -# # Copyright (c) QuantCo 2024-2024 -# # SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) QuantCo 2024-2024 +# SPDX-License-Identifier: BSD-3-Clause import time from collections.abc import Callable, Mapping, Sequence diff --git a/tests/test_metalearner_grid_search_cv.py b/tests/test_metalearner_grid_search_cv.py index f8d1638..e992ce5 100644 --- a/tests/test_metalearner_grid_search_cv.py +++ b/tests/test_metalearner_grid_search_cv.py @@ -1,5 +1,5 @@ -# # Copyright (c) QuantCo 2024-2024 -# # SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) QuantCo 2024-2024 +# SPDX-License-Identifier: BSD-3-Clause import pytest From d38e9d52dad37322b4eef9fac03a61927ba2f666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Tue, 25 Jun 2024 17:08:56 +0200 Subject: [PATCH 34/59] Update CHANGELOG --- CHANGELOG.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 42ec926..4526f78 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,13 @@ Changelog ========= +0.6.0 (2024-06-**) +------------------ + +* Added ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and + implemented the abstract method for the :class:`metalearners.XLearner` and + :class:`metalearners.DRLearner`. + 0.5.0 (2024-06-18) ------------------ @@ -30,10 +37,6 @@ Changelog 0.4.0 (2024-06-18) ------------------ -* Added ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and - implemented the abstract method for the :class:`metalearners.XLearner` and - :class:`metalearners.DRLearner`. - * Implemented :meth:`metalearners.cross_fit_estimator.CrossFitEstimator.clone`. * Added ``n_jobs_base_learners`` to :meth:`metalearners.metalearner.MetaLearner.fit`. From c20ae75321c972763c177bf5051f276336eb8924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Wed, 26 Jun 2024 17:12:55 +0200 Subject: [PATCH 35/59] Add option to evaluate treatment model in RLearner --- metalearners/metalearner.py | 10 +++++- metalearners/rlearner.py | 65 ++++++++++++++++++++++++++++++------- tests/test_learner.py | 7 ++++ 3 files changed, 69 insertions(+), 13 deletions(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 6a3c260..4bce635 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -146,6 +146,7 @@ def _evaluate_model_kind( is_oos: bool, is_treatment_model: bool, oos_method: OosMethod = OVERALL, + sample_weights: Sequence[Vector] | None = None, ) -> dict[str, float]: """Helper function to evaluate all the models of the same model kind.""" prefix = f"{model_kind}_" @@ -168,7 +169,14 @@ def _evaluate_model_kind( index_str = f"{i}_" name = f"{prefix}{index_str}{scorer_name}" with _PredictContext(cfe, is_oos, oos_method) as modified_cfe: - evaluation_metrics[name] = scorer_callable(modified_cfe, Xs[i], ys[i]) + if sample_weights: + evaluation_metrics[name] = scorer_callable( + modified_cfe, Xs[i], ys[i], sample_weight=sample_weights[i] + ) + else: + evaluation_metrics[name] = scorer_callable( + modified_cfe, Xs[i], ys[i] + ) return evaluation_metrics diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index aec5917..ee6e45d 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -230,6 +230,7 @@ def fit( treatment_variant=treatment_variant, mask=mask, epsilon=epsilon, + is_oos=False, ) X_filtered = index_matrix(X, mask) @@ -337,8 +338,9 @@ def evaluate( scoring: Scoring | None = None, ) -> dict[str, float]: """In the RLearner case, the ``"treatment_model"`` is always evaluated with the - :func:`~metalearners.rlearner.r_loss` and the ``scoring["treatment_model"]`` - parameter is ignored.""" + :func:`~metalearners.rlearner.r_loss` besides the scorers in + ``scoring["treatment_model"]``, which should support passing the + ``sample_weight`` keyword argument.""" safe_scoring = self._scoring(scoring) propensity_evaluation = _evaluate_model_kind( @@ -382,7 +384,39 @@ def evaluate( if self.is_classification: y_hat = y_hat[:, 1] - treatment_evaluation = {} + pseudo_outcome: list[np.ndarray] = [] + sample_weights: list[np.ndarray] = [] + masks: list[Vector] = [] + is_control = w == 0 + for treatment_variant in range(1, self.n_variants): + is_treatment = w == treatment_variant + mask = is_treatment | is_control + tv_pseudo_outcome, tv_sample_weights = self._pseudo_outcome_and_weights( + X=X, + y=y, + w=w, + treatment_variant=treatment_variant, + is_oos=is_oos, + oos_method=oos_method, + mask=mask, + ) + pseudo_outcome.append(tv_pseudo_outcome) + sample_weights.append(tv_sample_weights) + masks.append(mask) + + treatment_evaluation = _evaluate_model_kind( + self._treatment_models[TREATMENT_MODEL], + Xs=[X[masks[tv - 1]] for tv in range(1, self.n_variants)], + ys=pseudo_outcome, + scorers=safe_scoring[TREATMENT_MODEL], + model_kind=TREATMENT_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment_model=True, + sample_weights=sample_weights, + ) + + rloss_evaluation = {} tau_hat = self.predict(X=X, is_oos=is_oos, oos_method=oos_method) is_control = w == 0 for treatment_variant in range(1, self.n_variants): @@ -397,15 +431,19 @@ def evaluate( if self.is_classification else tau_hat[:, treatment_variant - 1, 0] ) - treatment_evaluation[f"r_loss_{treatment_variant}_vs_0"] = r_loss( + rloss_evaluation[f"r_loss_{treatment_variant}_vs_0"] = r_loss( cate_estimates=cate_estimates[mask], outcome_estimates=y_hat[mask], propensity_scores=propensity_estimates[mask], outcomes=y[mask], treatments=w[mask] == treatment_variant, ) - - return propensity_evaluation | outcome_evaluation | treatment_evaluation + return ( + propensity_evaluation + | outcome_evaluation + | rloss_evaluation + | treatment_evaluation + ) def _pseudo_outcome_and_weights( self, @@ -413,15 +451,13 @@ def _pseudo_outcome_and_weights( y: Vector, w: Vector, treatment_variant: int, + is_oos: bool, + oos_method: OosMethod = OVERALL, mask: Vector | None = None, epsilon: float = _EPSILON, ) -> tuple[np.ndarray, np.ndarray]: """Compute the R-Learner pseudo outcome and corresponding weights. - Importantly, this method assumes to be applied on in-sample data. - In other words, ``is_oos`` will always be set to ``False`` when calling - ``predict_nuisance``. - If ``mask`` is provided, the retuned pseudo outcomes and weights are only with respect the observations that the mask selects. @@ -437,12 +473,17 @@ def _pseudo_outcome_and_weights( # be able to match original observations with their corresponding folds. y_estimates = self.predict_nuisance( X=X, - is_oos=False, + is_oos=is_oos, model_kind=OUTCOME_MODEL, model_ord=0, + oos_method=oos_method, )[mask] w_estimates = self.predict_nuisance( - X=X, is_oos=False, model_kind=PROPENSITY_MODEL, model_ord=0 + X=X, + is_oos=is_oos, + model_kind=PROPENSITY_MODEL, + model_ord=0, + oos_method=oos_method, )[mask] w_estimates_binarized = w_estimates[:, treatment_variant] / ( w_estimates[:, 0] + w_estimates[:, treatment_variant] diff --git a/tests/test_learner.py b/tests/test_learner.py index c72c961..f001eda 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -372,6 +372,10 @@ def test_learner_evaluate( assert ( {f"r_loss_{i}_vs_0" for i in range(1, n_variants)} | {"propensity_model_neg_log_loss"} + | { + f"treatment_model_{i}_vs_0_neg_root_mean_squared_error" + for i in range(1, n_variants) + } ) <= set(evaluation.keys()) elif metalearner == "X": assert "propensity_model_neg_log_loss" in evaluation @@ -446,11 +450,14 @@ def new_score_2(y, y_pred): { "outcome_model": [make_scorer(new_score_2)], "propensity_model": [], + "treatment_model": ["neg_mean_absolute_error"], }, { "outcome_model_custom_scorer_0", "r_loss_1_vs_0", "r_loss_2_vs_0", + "treatment_model_1_vs_0_neg_mean_absolute_error", + "treatment_model_2_vs_0_neg_mean_absolute_error", }, ), ( From a14932c1f212e576d36d269a6b938f85a6403127 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Thu, 27 Jun 2024 09:16:44 +0200 Subject: [PATCH 36/59] Update metalearners/metalearner_grid_search_cv.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner_grid_search_cv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner_grid_search_cv.py b/metalearners/metalearner_grid_search_cv.py index aa32294..c483baa 100644 --- a/metalearners/metalearner_grid_search_cv.py +++ b/metalearners/metalearner_grid_search_cv.py @@ -75,7 +75,7 @@ def _fit_and_score(job: _FitAndScoreJob) -> _CVResult: ) -def _format_results(results: Sequence[_CVResult]): +def _format_results(results: Sequence[_CVResult]) -> pd.DataFrame: rows = [] for result in results: row: dict[str, str | int | float] = {} From 003e6cecd2e7cf7e7b7363473c030ad245dbb122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Thu, 27 Jun 2024 09:19:23 +0200 Subject: [PATCH 37/59] Update metalearners/metalearner_grid_search_cv.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner_grid_search_cv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner_grid_search_cv.py b/metalearners/metalearner_grid_search_cv.py index c483baa..6c61470 100644 --- a/metalearners/metalearner_grid_search_cv.py +++ b/metalearners/metalearner_grid_search_cv.py @@ -121,7 +121,7 @@ class MetaLearnerGridSearchCV: Importantly, ``random_state`` must be passed through the ``random_state`` parameter and not through ``metalearner_params``. - ``base_learner_grid`` keys should be the names of all the models contained in the MetaLearner + ``base_learner_grid`` keys should be the names of all the base models contained in the :class:`~metalearners.metalearners.MetaLearner` defined by ``metalearner_factory``, for information about this names check :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` and :meth:`~metalearners.metalearner.MetaLearner.treatment_model_specifications`. The From 64d2ebf012d5ac4ebe957dd5a4d3093c26528b44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Thu, 27 Jun 2024 09:19:36 +0200 Subject: [PATCH 38/59] Update metalearners/metalearner_grid_search_cv.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner_grid_search_cv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metalearners/metalearner_grid_search_cv.py b/metalearners/metalearner_grid_search_cv.py index 6c61470..e38a99f 100644 --- a/metalearners/metalearner_grid_search_cv.py +++ b/metalearners/metalearner_grid_search_cv.py @@ -154,8 +154,9 @@ class MetaLearnerGridSearchCV: ``verbose`` will be passed to `joblib.Parallel `_. - Check TODO to see an example of the usage of this class. """ + +# TODO: Add a reference to a docs example once it is written. def __init__( self, From 1860254fab83f94c0bb5c7f160a8760a3a4fe95c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 27 Jun 2024 09:24:13 +0200 Subject: [PATCH 39/59] Rename module --- CHANGELOG.rst | 2 +- conda.recipe/recipe.yaml | 1 + .../{metalearner_grid_search_cv.py => grid_search.py} | 5 ++--- ...est_metalearner_grid_search_cv.py => test_grid_search.py} | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) rename metalearners/{metalearner_grid_search_cv.py => grid_search.py} (99%) rename tests/{test_metalearner_grid_search_cv.py => test_grid_search.py} (97%) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index efa47e4..91ce981 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,7 +10,7 @@ Changelog 0.6.0 (2024-06-**) ------------------ -* Implemented :class:`metalearners.metalearner_grid_search_cv.MetaLearnerGridSearchCV`. +* Implemented :class:`metalearners.grid_search.MetaLearnerGridSearchCV`. * Added ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and implemented the abstract method for the :class:`metalearners.XLearner` and diff --git a/conda.recipe/recipe.yaml b/conda.recipe/recipe.yaml index 192234c..3594845 100644 --- a/conda.recipe/recipe.yaml +++ b/conda.recipe/recipe.yaml @@ -45,6 +45,7 @@ tests: - metalearners.rlearner - metalearners.drlearner - metalearners.explainer + - metalearners.grid_search pip_check: true about: diff --git a/metalearners/metalearner_grid_search_cv.py b/metalearners/grid_search.py similarity index 99% rename from metalearners/metalearner_grid_search_cv.py rename to metalearners/grid_search.py index e38a99f..78ba23a 100644 --- a/metalearners/metalearner_grid_search_cv.py +++ b/metalearners/grid_search.py @@ -153,10 +153,9 @@ class MetaLearnerGridSearchCV: For how to define ``scoring`` check :meth:`~metalearners.metalearner.MetaLearner.evaluate`. ``verbose`` will be passed to `joblib.Parallel `_. - """ - -# TODO: Add a reference to a docs example once it is written. + + # TODO: Add a reference to a docs example once it is written. def __init__( self, diff --git a/tests/test_metalearner_grid_search_cv.py b/tests/test_grid_search.py similarity index 97% rename from tests/test_metalearner_grid_search_cv.py rename to tests/test_grid_search.py index e992ce5..5fff566 100644 --- a/tests/test_metalearner_grid_search_cv.py +++ b/tests/test_grid_search.py @@ -7,7 +7,7 @@ from sklearn.linear_model import LinearRegression, LogisticRegression from metalearners.drlearner import DRLearner -from metalearners.metalearner_grid_search_cv import MetaLearnerGridSearchCV +from metalearners.grid_search import MetaLearnerGridSearchCV from metalearners.rlearner import RLearner from metalearners.slearner import SLearner from metalearners.tlearner import TLearner From c08dd6ae69dd38cdfb96ec2eea78bc40dad631e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 27 Jun 2024 09:27:51 +0200 Subject: [PATCH 40/59] Reuse typing --- metalearners/grid_search.py | 8 ++++---- metalearners/metalearner.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 78ba23a..2352a89 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: BSD-3-Clause import time -from collections.abc import Callable, Mapping, Sequence +from collections.abc import Mapping, Sequence from dataclasses import dataclass from functools import reduce from operator import add @@ -11,7 +11,7 @@ from joblib import Parallel, delayed from sklearn.model_selection import KFold, ParameterGrid -from metalearners._typing import Matrix, OosMethod, Vector, _ScikitModel +from metalearners._typing import Matrix, OosMethod, Scoring, Vector, _ScikitModel from metalearners._utils import index_matrix, index_vector from metalearners.cross_fit_estimator import OVERALL from metalearners.metalearner import PROPENSITY_MODEL, MetaLearner @@ -27,7 +27,7 @@ class _FitAndScoreJob: y_test: Vector w_test: Vector oos_method: OosMethod - scoring: Mapping[str, list[str | Callable]] | None + scoring: Scoring | None kwargs: dict cv_index: int @@ -163,7 +163,7 @@ def __init__( metalearner_params: Mapping, base_learner_grid: Mapping[str, Sequence[type[_ScikitModel]]], param_grid: Mapping[str, Mapping[str, Sequence]], - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, cv: int = 5, n_jobs: int | None = None, random_state: int | None = None, diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 4bce635..1efe82b 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: BSD-3-Clause from abc import ABC, abstractmethod -from collections.abc import Callable, Collection, Mapping, Sequence +from collections.abc import Callable, Collection, Sequence from copy import deepcopy from dataclasses import dataclass from typing import TypedDict @@ -856,7 +856,7 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, ) -> dict[str, float]: r"""Evaluate the MetaLearner. From f2edc25cbe50058bb3deb886c8fa4eba858923dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 28 Jun 2024 14:25:44 +0200 Subject: [PATCH 41/59] Use three nested levels to allow different grids --- metalearners/grid_search.py | 49 +++++++++++++++++-------------------- tests/test_grid_search.py | 26 ++++++++++++-------- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 2352a89..ed6bcdb 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -4,8 +4,7 @@ import time from collections.abc import Mapping, Sequence from dataclasses import dataclass -from functools import reduce -from operator import add +from typing import Any import pandas as pd from joblib import Parallel, delayed @@ -141,10 +140,14 @@ class MetaLearnerGridSearchCV: } param_grid = { - "LGBMRegressor": {"n_estimators": [1, 2], "verbose": [-1]}, - "LGBMClassifier": { - "n_estimators": [1, 2, 3], - "verbose": [-1], + "propensity_model": { + "LGBMClassifier": {"n_estimators": [1, 2, 3], "verbose": [-1]} + }, + "variant_outcome_model": { + "LGBMRegressor": {"n_estimators": [1, 2], "verbose": [-1]}, + }, + "treatment_model": { + "LGBMRegressor": {"n_estimators": [5, 10], "verbose": [-1]}, }, } @@ -160,9 +163,9 @@ class MetaLearnerGridSearchCV: def __init__( self, metalearner_factory: type[MetaLearner], - metalearner_params: Mapping, + metalearner_params: Mapping[str, Any], base_learner_grid: Mapping[str, Sequence[type[_ScikitModel]]], - param_grid: Mapping[str, Mapping[str, Sequence]], + param_grid: Mapping[str, Mapping[str, Mapping[str, Sequence]]], scoring: Scoring | None = None, cv: int = 5, n_jobs: int | None = None, @@ -185,21 +188,10 @@ def __init__( ) | set(metalearner_factory.treatment_model_specifications().keys()) if set(base_learner_grid.keys()) != expected_base_models: - raise ValueError - - all_base_learners = set(reduce(add, base_learner_grid.values())) - param_grid_empty: Mapping[str, Mapping[str, Sequence]] = { - k.__name__: {} for k in all_base_learners if k.__name__ not in param_grid - } + raise ValueError("base_learner_grid keys don't match the model names.") self.base_learner_grid = list(ParameterGrid(base_learner_grid)) - # Mapping does not have union "|" operator, see - # https://peps.python.org/pep-0584/#what-about-mapping-and-mutablemapping - full_param_grid = {**param_grid_empty, **param_grid} - self.base_learner_param_grids = { - base_learner: list(ParameterGrid(base_learner_param_grid)) - for base_learner, base_learner_param_grid in full_param_grid.items() - } + self.param_grid = param_grid def fit( self, @@ -245,14 +237,17 @@ def fit( for model_kind in treatment_models } propensity_model_factory = base_learners.get(PROPENSITY_MODEL, None) - - param_grid = { - model_kind: self.base_learner_param_grids[ - base_learners[model_kind].__name__ - ] + base_learner_param_grids = { + model_kind: list( + ParameterGrid( + self.param_grid.get(model_kind, {}).get( + base_learners[model_kind].__name__, {} + ) + ) + ) for model_kind in all_models } - for params in ParameterGrid(param_grid): + for params in ParameterGrid(base_learner_param_grids): nuisance_model_params = { model_kind: params[model_kind] for model_kind in nuisance_models_no_propensity diff --git a/tests/test_grid_search.py b/tests/test_grid_search.py index 5fff566..e9af864 100644 --- a/tests/test_grid_search.py +++ b/tests/test_grid_search.py @@ -21,21 +21,21 @@ SLearner, False, {"base_model": [LinearRegression, LGBMRegressor]}, - {"LGBMRegressor": {"n_estimators": [1, 2]}}, + {"base_model": {"LGBMRegressor": {"n_estimators": [1, 2]}}}, 3, ), ( SLearner, True, {"base_model": [LogisticRegression, LGBMClassifier]}, - {"LGBMClassifier": {"n_estimators": [1, 2]}}, + {"base_model": {"LGBMClassifier": {"n_estimators": [1, 2]}}}, 3, ), ( TLearner, False, {"variant_outcome_model": [LinearRegression, LGBMRegressor]}, - {"LGBMRegressor": {"n_estimators": [1, 2, 3]}}, + {"variant_outcome_model": {"LGBMRegressor": {"n_estimators": [1, 2, 3]}}}, 4, ), ( @@ -44,11 +44,15 @@ { "variant_outcome_model": [LinearRegression], "propensity_model": [LGBMClassifier], - "control_effect_model": [LinearRegression], - "treatment_effect_model": [LinearRegression], + "control_effect_model": [LGBMRegressor], + "treatment_effect_model": [LGBMRegressor], }, - {"LGBMClassifier": {"n_estimators": [1, 2, 3]}}, - 3, + { + "propensity_model": {"LGBMClassifier": {"n_estimators": [1, 2, 3]}}, + "control_effect_model": {"LGBMRegressor": {"n_estimators": [1, 2]}}, + "treatment_effect_model": {"LGBMRegressor": {"n_estimators": [1]}}, + }, + 6, ), ( RLearner, @@ -59,8 +63,8 @@ "treatment_model": [LGBMRegressor], }, { - "LGBMClassifier": {"n_estimators": [1, 2, 3]}, - "LGBMRegressor": {"n_estimators": [1, 2, 3]}, + "propensity_model": {"LGBMClassifier": {"n_estimators": [1, 2, 3]}}, + "treatment_model": {"LGBMRegressor": {"n_estimators": [1, 2, 3]}}, }, 9, ), @@ -72,7 +76,9 @@ "propensity_model": [LGBMClassifier], "treatment_model": [LinearRegression], }, - {"LGBMClassifier": {"n_estimators": [1, 2, 3, 4]}}, + { + "propensity_model": {"LGBMClassifier": {"n_estimators": [1, 2, 3, 4]}}, + }, 4, ), ], From 3b841e52ae79b0126c01814d7cee584629289002 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 4 Jul 2024 10:14:54 +0200 Subject: [PATCH 42/59] Disable cv to be able to reuse models --- metalearners/grid_search.py | 210 ++++++++++++++++++------------------ tests/test_grid_search.py | 88 +++++++++++++-- 2 files changed, 187 insertions(+), 111 deletions(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index ed6bcdb..3cfdf99 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -8,10 +8,9 @@ import pandas as pd from joblib import Parallel, delayed -from sklearn.model_selection import KFold, ParameterGrid +from sklearn.model_selection import ParameterGrid from metalearners._typing import Matrix, OosMethod, Scoring, Vector, _ScikitModel -from metalearners._utils import index_matrix, index_vector from metalearners.cross_fit_estimator import OVERALL from metalearners.metalearner import PROPENSITY_MODEL, MetaLearner @@ -22,28 +21,26 @@ class _FitAndScoreJob: X_train: Matrix y_train: Vector w_train: Vector - X_test: Matrix - y_test: Vector - w_test: Vector + X_test: Matrix | None + y_test: Vector | None + w_test: Vector | None oos_method: OosMethod scoring: Scoring | None kwargs: dict - cv_index: int @dataclass(frozen=True) -class _CVResult: +class _GSResult: r"""Cross Validation Result.""" metalearner: MetaLearner train_scores: dict - test_scores: dict + test_scores: dict | None fit_time: float score_time: float - cv_index: int -def _fit_and_score(job: _FitAndScoreJob) -> _CVResult: +def _fit_and_score(job: _FitAndScoreJob) -> _GSResult: start_time = time.time() job.metalearner.fit(job.X_train, job.y_train, job.w_train, **job.kwargs) fit_time = time.time() - start_time @@ -55,31 +52,36 @@ def _fit_and_score(job: _FitAndScoreJob) -> _CVResult: is_oos=False, scoring=job.scoring, ) - test_scores = job.metalearner.evaluate( - X=job.X_test, - y=job.y_test, - w=job.w_test, - is_oos=True, - oos_method=job.oos_method, - scoring=job.scoring, - ) + if job.X_test is not None and job.y_test is not None and job.w_test is not None: + test_scores = job.metalearner.evaluate( + X=job.X_test, + y=job.y_test, + w=job.w_test, + is_oos=True, + oos_method=job.oos_method, + scoring=job.scoring, + ) + else: + test_scores = None score_time = time.time() - fit_time - return _CVResult( + return _GSResult( metalearner=job.metalearner, fit_time=fit_time, score_time=score_time, train_scores=train_scores, test_scores=test_scores, - cv_index=job.cv_index, ) -def _format_results(results: Sequence[_CVResult]) -> pd.DataFrame: +def _format_results(results: Sequence[_GSResult]) -> pd.DataFrame: rows = [] for result in results: row: dict[str, str | int | float] = {} row["metalearner"] = result.metalearner.__class__.__name__ - nuisance_models = set(result.metalearner.nuisance_model_specifications().keys()) + nuisance_models = ( + set(result.metalearner.nuisance_model_specifications().keys()) + - result.metalearner._prefitted_nuisance_models + ) treatment_models = set( result.metalearner.treatment_model_specifications().keys() ) @@ -99,19 +101,19 @@ def _format_results(results: Sequence[_CVResult]) -> pd.DataFrame: model_kind ].items(): row[f"{model_kind}_{param}"] = value - row["cv_index"] = result.cv_index row["fit_time"] = result.fit_time row["score_time"] = result.score_time for name, value in result.train_scores.items(): row[f"train_{name}"] = value - for name, value in result.test_scores.items(): - row[f"test_{name}"] = value + if result.test_scores is not None: + for name, value in result.test_scores.items(): + row[f"test_{name}"] = value rows.append(row) df = pd.DataFrame(rows) return df -class MetaLearnerGridSearchCV: +class MetaLearnerGridSearch: """Exhaustive search over specified parameter values for a MetaLearner. ``metalearner_params`` should contain the necessary params for the MetaLearner initialization @@ -167,7 +169,6 @@ def __init__( base_learner_grid: Mapping[str, Sequence[type[_ScikitModel]]], param_grid: Mapping[str, Mapping[str, Mapping[str, Sequence]]], scoring: Scoring | None = None, - cv: int = 5, n_jobs: int | None = None, random_state: int | None = None, verbose: int = 0, @@ -175,20 +176,30 @@ def __init__( self.metalearner_factory = metalearner_factory self.metalearner_params = metalearner_params self.scoring = scoring - self.cv = cv self.n_jobs = n_jobs self.random_state = random_state self.verbose = verbose - self.raw_results_: Sequence[_CVResult] | None = None - self.cv_results_: pd.DataFrame | None = None + self.raw_results_: Sequence[_GSResult] | None = None + self.results_: pd.DataFrame | None = None - expected_base_models = set( + all_base_models = set( metalearner_factory.nuisance_model_specifications().keys() ) | set(metalearner_factory.treatment_model_specifications().keys()) - if set(base_learner_grid.keys()) != expected_base_models: - raise ValueError("base_learner_grid keys don't match the model names.") + self.fitted_models = set( + metalearner_params.get("fitted_nuisance_models", {}).keys() + ) + if metalearner_params.get("fitted_propensity_model", None) is not None: + self.fitted_models |= {PROPENSITY_MODEL} + + self.models_to_fit = all_base_models - self.fitted_models + + if set(base_learner_grid.keys()) != self.models_to_fit: + raise ValueError( + "base_learner_grid keys don't match the expected model names. base_learner_grid " + f"keys were expected to be {self.models_to_fit}." + ) self.base_learner_grid = list(ParameterGrid(base_learner_grid)) self.param_grid = param_grid @@ -198,94 +209,89 @@ def fit( X: Matrix, y: Vector, w: Vector, + X_test: Matrix | None = None, + y_test: Vector | None = None, + w_test: Vector | None = None, oos_method: OosMethod = OVERALL, **kwargs, ): """Run fit with all sets of parameters. + ``X_test``, ``y_test`` and ``w_test`` are optional, in case they are passed all the + fitted metalearners will be evaluated on it. + ``kwargs`` will be passed through to the :meth:`~metalearners.metalearner.MetaLearner.fit` call of each individual MetaLearner. """ - cv = KFold(n_splits=self.cv, shuffle=True, random_state=self.random_state) + nuisance_models_no_propensity = set.intersection( + set(self.metalearner_factory.nuisance_model_specifications().keys()) + - {PROPENSITY_MODEL}, + self.models_to_fit, + ) - nuisance_models_no_propensity = set( - self.metalearner_factory.nuisance_model_specifications().keys() - ) - {PROPENSITY_MODEL} + # We don't need to intersect as treatment models can't be reused treatment_models = set( self.metalearner_factory.treatment_model_specifications().keys() ) - all_models = set( - self.metalearner_factory.nuisance_model_specifications().keys() - ) | set(self.metalearner_factory.treatment_model_specifications().keys()) - jobs: list[_FitAndScoreJob] = [] - for cv_index, (train_indices, test_indices) in enumerate(cv.split(X)): - X_train = index_matrix(X, train_indices) - X_test = index_matrix(X, test_indices) - y_train = index_vector(y, train_indices) - y_test = index_vector(y, test_indices) - w_train = index_vector(w, train_indices) - w_test = index_vector(w, test_indices) - for base_learners in self.base_learner_grid: - nuisance_model_factory = { - model_kind: base_learners[model_kind] - for model_kind in nuisance_models_no_propensity - } - treatment_model_factory = { - model_kind: base_learners[model_kind] - for model_kind in treatment_models - } - propensity_model_factory = base_learners.get(PROPENSITY_MODEL, None) - base_learner_param_grids = { - model_kind: list( - ParameterGrid( - self.param_grid.get(model_kind, {}).get( - base_learners[model_kind].__name__, {} - ) + + for base_learners in self.base_learner_grid: + nuisance_model_factory = { + model_kind: base_learners[model_kind] + for model_kind in nuisance_models_no_propensity + } + treatment_model_factory = { + model_kind: base_learners[model_kind] for model_kind in treatment_models + } + propensity_model_factory = base_learners.get(PROPENSITY_MODEL, None) + base_learner_param_grids = { + model_kind: list( + ParameterGrid( + self.param_grid.get(model_kind, {}).get( + base_learners[model_kind].__name__, {} ) ) - for model_kind in all_models + ) + for model_kind in self.models_to_fit + } + for params in ParameterGrid(base_learner_param_grids): + nuisance_model_params = { + model_kind: params[model_kind] + for model_kind in nuisance_models_no_propensity } - for params in ParameterGrid(base_learner_param_grids): - nuisance_model_params = { - model_kind: params[model_kind] - for model_kind in nuisance_models_no_propensity - } - treatment_model_params = { - model_kind: params[model_kind] - for model_kind in treatment_models - } - propensity_model_params = params.get(PROPENSITY_MODEL, None) - - ml = self.metalearner_factory( - **self.metalearner_params, - nuisance_model_factory=nuisance_model_factory, - treatment_model_factory=treatment_model_factory, - propensity_model_factory=propensity_model_factory, - nuisance_model_params=nuisance_model_params, - treatment_model_params=treatment_model_params, - propensity_model_params=propensity_model_params, - random_state=self.random_state, - ) - - jobs.append( - _FitAndScoreJob( - metalearner=ml, - X_train=X_train, - y_train=y_train, - w_train=w_train, - X_test=X_test, - y_test=y_test, - w_test=w_test, - oos_method=oos_method, - scoring=self.scoring, - kwargs=kwargs, - cv_index=cv_index, - ) + treatment_model_params = { + model_kind: params[model_kind] for model_kind in treatment_models + } + propensity_model_params = params.get(PROPENSITY_MODEL, None) + + ml = self.metalearner_factory( + **self.metalearner_params, + nuisance_model_factory=nuisance_model_factory, + treatment_model_factory=treatment_model_factory, + propensity_model_factory=propensity_model_factory, + nuisance_model_params=nuisance_model_params, + treatment_model_params=treatment_model_params, + propensity_model_params=propensity_model_params, + random_state=self.random_state, + ) + + jobs.append( + _FitAndScoreJob( + metalearner=ml, + X_train=X, + y_train=y, + w_train=w, + X_test=X_test, + y_test=y_test, + w_test=w_test, + oos_method=oos_method, + scoring=self.scoring, + kwargs=kwargs, ) + ) parallel = Parallel(n_jobs=self.n_jobs, verbose=self.verbose) raw_results = parallel(delayed(_fit_and_score)(job) for job in jobs) self.raw_results_ = raw_results - self.cv_results_ = _format_results(results=raw_results) + self.results_ = _format_results(results=raw_results) diff --git a/tests/test_grid_search.py b/tests/test_grid_search.py index e9af864..762f163 100644 --- a/tests/test_grid_search.py +++ b/tests/test_grid_search.py @@ -7,7 +7,8 @@ from sklearn.linear_model import LinearRegression, LogisticRegression from metalearners.drlearner import DRLearner -from metalearners.grid_search import MetaLearnerGridSearchCV +from metalearners.grid_search import MetaLearnerGridSearch +from metalearners.metalearner import VARIANT_OUTCOME_MODEL from metalearners.rlearner import RLearner from metalearners.slearner import SLearner from metalearners.tlearner import TLearner @@ -84,14 +85,12 @@ ], ) @pytest.mark.parametrize("n_variants", [2, 5]) -@pytest.mark.parametrize("cv", [2, 3]) -def test_metalearnergridsearchcv_smoke( +def test_metalearnergridsearch_smoke( metalearner_factory, is_classification, n_variants, base_learner_grid, param_grid, - cv, rng, expected_n_configs, ): @@ -100,21 +99,92 @@ def test_metalearnergridsearchcv_smoke( "n_variants": n_variants, "n_folds": 2, } - gs = MetaLearnerGridSearchCV( + gs = MetaLearnerGridSearch( metalearner_factory=metalearner_factory, metalearner_params=metalearner_params, base_learner_grid=base_learner_grid, param_grid=param_grid, - cv=cv, ) n_samples = 250 + n_test_samples = 100 X = rng.standard_normal((n_samples, 3)) + X_test = rng.standard_normal((n_test_samples, 3)) if is_classification: y = rng.integers(0, 2, n_samples) + y_test = rng.integers(0, 2, n_test_samples) else: y = rng.standard_normal(n_samples) + y_test = rng.standard_normal(n_test_samples) w = rng.integers(0, n_variants, n_samples) + w_test = rng.integers(0, n_variants, n_test_samples) - gs.fit(X, y, w) - assert gs.cv_results_ is not None - assert gs.cv_results_.shape[0] == expected_n_configs * cv + gs.fit(X, y, w, X_test, y_test, w_test) + assert gs.results_ is not None + assert gs.results_.shape[0] == expected_n_configs + + train_scores_cols = set( + c[6:] for c in list(gs.results_.columns) if c.startswith("train_") + ) + test_scores_cols = set( + c[5:] for c in list(gs.results_.columns) if c.startswith("test_") + ) + assert train_scores_cols == test_scores_cols + + +def test_metalearnergridsearch_reuse_smoke(rng): + n_variants = 3 + n_samples = 250 + n_test_samples = 100 + + X = rng.standard_normal((n_samples, 3)) + X_test = rng.standard_normal((n_test_samples, 3)) + y = rng.standard_normal(n_samples) + y_test = rng.standard_normal(n_test_samples) + w = rng.integers(0, n_variants, n_samples) + w_test = rng.integers(0, n_variants, n_test_samples) + + tl = TLearner( + False, + n_variants, + LGBMRegressor, + nuisance_model_params={"verbose": -1, "n_estimators": 1}, + n_folds=2, + ) + tl.fit(X, y, w) + + gs = MetaLearnerGridSearch( + DRLearner, + { + "is_classification": False, + "n_variants": n_variants, + "n_folds": 5, # To test with different n_folds than the pretrained + "fitted_nuisance_models": { + VARIANT_OUTCOME_MODEL: tl._nuisance_models[VARIANT_OUTCOME_MODEL] + }, + }, + { + "propensity_model": [LGBMClassifier, LogisticRegression], + "treatment_model": [LGBMRegressor], + }, + { + "treatment_model": { + "LGBMRegressor": {"n_estimators": [1, 2], "verbose": [-1]} + }, + "propensity_model": { + "LGBMClassifier": { + "n_estimators": [1, 2, 3], + "verbose": [-1], + } + }, + }, + verbose=3, + random_state=1, + ) + gs.fit(X, y, w, X_test, y_test, w_test) + assert gs.raw_results_ is not None + for raw_result in gs.raw_results_: + assert raw_result.metalearner._prefitted_nuisance_models == { + VARIANT_OUTCOME_MODEL + } + assert gs.results_ is not None + assert gs.results_.shape[0] == 8 From a7be0cd4222c121bb397caf6449526e10580fb70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 4 Jul 2024 10:24:54 +0200 Subject: [PATCH 43/59] Add text about reusage in docs --- metalearners/grid_search.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 3cfdf99..41429c9 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -122,12 +122,15 @@ class MetaLearnerGridSearch: Importantly, ``random_state`` must be passed through the ``random_state`` parameter and not through ``metalearner_params``. - ``base_learner_grid`` keys should be the names of all the base models contained in the :class:`~metalearners.metalearners.MetaLearner` + ``base_learner_grid`` keys should be the names of the needed base models contained in the :class:`~metalearners.metalearners.MetaLearner` defined by ``metalearner_factory``, for information about this names check :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` and :meth:`~metalearners.metalearner.MetaLearner.treatment_model_specifications`. The values should be sequences of model factories. + If models are reused, they should be passed through ``metalearner_params`` and they + should not be in ``base_learner_grid``. + ``param_grid`` should contain the parameters grid for each type of model used by the base learners defined in ``base_learner_grid``. The keys should be strings with the model class name. An example for optimizing over the :class:`metalearners.DRLearner` From 13eeed14847fd3862c9b18bca4f3a963fcc4874d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 4 Jul 2024 10:50:10 +0200 Subject: [PATCH 44/59] Add test propensity model reuse --- tests/test_grid_search.py | 57 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/tests/test_grid_search.py b/tests/test_grid_search.py index 762f163..aeae5ec 100644 --- a/tests/test_grid_search.py +++ b/tests/test_grid_search.py @@ -8,7 +8,7 @@ from metalearners.drlearner import DRLearner from metalearners.grid_search import MetaLearnerGridSearch -from metalearners.metalearner import VARIANT_OUTCOME_MODEL +from metalearners.metalearner import PROPENSITY_MODEL, VARIANT_OUTCOME_MODEL from metalearners.rlearner import RLearner from metalearners.slearner import SLearner from metalearners.tlearner import TLearner @@ -131,7 +131,7 @@ def test_metalearnergridsearch_smoke( assert train_scores_cols == test_scores_cols -def test_metalearnergridsearch_reuse_smoke(rng): +def test_metalearnergridsearch_reuse_nuisance_smoke(rng): n_variants = 3 n_samples = 250 n_test_samples = 100 @@ -188,3 +188,56 @@ def test_metalearnergridsearch_reuse_smoke(rng): } assert gs.results_ is not None assert gs.results_.shape[0] == 8 + + +def test_metalearnergridsearch_reuse_propensity_smoke(rng): + n_variants = 3 + n_samples = 250 + n_test_samples = 100 + + X = rng.standard_normal((n_samples, 3)) + X_test = rng.standard_normal((n_test_samples, 3)) + y = rng.standard_normal(n_samples) + y_test = rng.standard_normal(n_test_samples) + w = rng.integers(0, n_variants, n_samples) + w_test = rng.integers(0, n_variants, n_test_samples) + + rl = RLearner( + False, + n_variants, + LGBMRegressor, + LGBMRegressor, + LGBMClassifier, + nuisance_model_params={"verbose": -1, "n_estimators": 1}, + treatment_model_params={"verbose": -1, "n_estimators": 1}, + propensity_model_params={"verbose": -1, "n_estimators": 1}, + n_folds=2, + ) + rl.fit(X, y, w) + + gs = MetaLearnerGridSearch( + DRLearner, + { + "is_classification": False, + "n_variants": n_variants, + "n_folds": 5, # To test with different n_folds than the pretrained + "fitted_propensity_model": rl._nuisance_models[PROPENSITY_MODEL][0], + }, + { + "treatment_model": [LGBMRegressor], + "variant_outcome_model": [LinearRegression], + }, + { + "treatment_model": { + "LGBMRegressor": {"n_estimators": [1, 2], "verbose": [-1]} + }, + }, + verbose=3, + random_state=1, + ) + gs.fit(X, y, w, X_test, y_test, w_test) + assert gs.raw_results_ is not None + for raw_result in gs.raw_results_: + assert raw_result.metalearner._prefitted_nuisance_models == {PROPENSITY_MODEL} + assert gs.results_ is not None + assert gs.results_.shape[0] == 2 From 0264937612154c90f0950ff936212b3b0a6f2137 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Thu, 4 Jul 2024 12:01:35 +0200 Subject: [PATCH 45/59] Update CHANGELOG.rst Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 037ba3f..b966221 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,7 +12,7 @@ Changelog **New features** -* Implemented :class:`metalearners.grid_search.MetaLearnerGridSearchCV`. +* Implemented :class:`metalearners.grid_search.MetaLearnerGridSearch`. * Added ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and implemented the abstract method for the :class:`metalearners.XLearner` and From bcaab55667cdfac4312d48a900abbba181165729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Thu, 4 Jul 2024 12:01:42 +0200 Subject: [PATCH 46/59] Update metalearners/grid_search.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/grid_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 41429c9..237fe98 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -31,7 +31,7 @@ class _FitAndScoreJob: @dataclass(frozen=True) class _GSResult: - r"""Cross Validation Result.""" + r"""Result from a single grid search evaluation.""" metalearner: MetaLearner train_scores: dict From 8ad4b87f644bee4e0faeb42d9ab64ba1f07198eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Thu, 4 Jul 2024 12:03:47 +0200 Subject: [PATCH 47/59] Update metalearners/grid_search.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/grid_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 237fe98..6c7c3c2 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -158,7 +158,7 @@ class MetaLearnerGridSearch: If some model is not present in ``param_grid``, the default parameters will be used. - For how to define ``scoring`` check :meth:`~metalearners.metalearner.MetaLearner.evaluate`. + For information on how to define ``scoring`` see :meth:`~metalearners.metalearner.MetaLearner.evaluate`. ``verbose`` will be passed to `joblib.Parallel `_. """ From 5e34a35682ed961e82776b1df4eadac7b69a757d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Thu, 4 Jul 2024 12:05:28 +0200 Subject: [PATCH 48/59] Update metalearners/grid_search.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/grid_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 6c7c3c2..90db94b 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -128,7 +128,7 @@ class MetaLearnerGridSearch: :meth:`~metalearners.metalearner.MetaLearner.treatment_model_specifications`. The values should be sequences of model factories. - If models are reused, they should be passed through ``metalearner_params`` and they + If base models are meant to be reused, they should be passed through ``metalearner_params`` and the corresponding keys should not be in ``base_learner_grid``. ``param_grid`` should contain the parameters grid for each type of model used by the From fa953380cbb18cc180723c68180f3b3f704605e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Thu, 4 Jul 2024 12:06:21 +0200 Subject: [PATCH 49/59] Update metalearners/grid_search.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/grid_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 90db94b..12e9e69 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -129,7 +129,7 @@ class MetaLearnerGridSearch: values should be sequences of model factories. If base models are meant to be reused, they should be passed through ``metalearner_params`` and the corresponding keys - should not be in ``base_learner_grid``. + should not be passed to ``base_learner_grid``. ``param_grid`` should contain the parameters grid for each type of model used by the base learners defined in ``base_learner_grid``. The keys should be strings with the From bac8cfbb143e95fba711e5faeec4824a91d6c417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Thu, 4 Jul 2024 12:45:57 +0200 Subject: [PATCH 50/59] Update metalearners/grid_search.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/grid_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 12e9e69..9e9105b 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -226,7 +226,7 @@ def fit( ``kwargs`` will be passed through to the :meth:`~metalearners.metalearner.MetaLearner.fit` call of each individual MetaLearner. """ - nuisance_models_no_propensity = set.intersection( + nuisance_models_wo_propensity = set.intersection( set(self.metalearner_factory.nuisance_model_specifications().keys()) - {PROPENSITY_MODEL}, self.models_to_fit, From 928edd7208c93b15a1de34cf04711f5d4b4c697e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 4 Jul 2024 12:46:23 +0200 Subject: [PATCH 51/59] Adapt var name --- metalearners/grid_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 9e9105b..60f3939 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -242,7 +242,7 @@ def fit( for base_learners in self.base_learner_grid: nuisance_model_factory = { model_kind: base_learners[model_kind] - for model_kind in nuisance_models_no_propensity + for model_kind in nuisance_models_wo_propensity } treatment_model_factory = { model_kind: base_learners[model_kind] for model_kind in treatment_models @@ -261,7 +261,7 @@ def fit( for params in ParameterGrid(base_learner_param_grids): nuisance_model_params = { model_kind: params[model_kind] - for model_kind in nuisance_models_no_propensity + for model_kind in nuisance_models_wo_propensity } treatment_model_params = { model_kind: params[model_kind] for model_kind in treatment_models From 83f0e78fc6fdf6ad3f1f5838cecb9a2bdc800131 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 4 Jul 2024 12:47:35 +0200 Subject: [PATCH 52/59] Use & --- metalearners/grid_search.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 60f3939..22bbe0b 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -226,11 +226,10 @@ def fit( ``kwargs`` will be passed through to the :meth:`~metalearners.metalearner.MetaLearner.fit` call of each individual MetaLearner. """ - nuisance_models_wo_propensity = set.intersection( + nuisance_models_wo_propensity = ( set(self.metalearner_factory.nuisance_model_specifications().keys()) - - {PROPENSITY_MODEL}, - self.models_to_fit, - ) + - {PROPENSITY_MODEL} + ) & self.models_to_fit # We don't need to intersect as treatment models can't be reused treatment_models = set( From d6c8c3ffd8e2046fd0034f14a8821cf6e95f4c9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 4 Jul 2024 12:49:52 +0200 Subject: [PATCH 53/59] Use ParameterGrid in fit and not init --- metalearners/grid_search.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 22bbe0b..2dcabd8 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -203,8 +203,7 @@ def __init__( "base_learner_grid keys don't match the expected model names. base_learner_grid " f"keys were expected to be {self.models_to_fit}." ) - self.base_learner_grid = list(ParameterGrid(base_learner_grid)) - + self.base_learner_grid = base_learner_grid self.param_grid = param_grid def fit( @@ -238,7 +237,7 @@ def fit( jobs: list[_FitAndScoreJob] = [] - for base_learners in self.base_learner_grid: + for base_learners in ParameterGrid(self.base_learner_grid): nuisance_model_factory = { model_kind: base_learners[model_kind] for model_kind in nuisance_models_wo_propensity From acade9ea8ab6324c35f2aa977fcc2032eab4b0bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 4 Jul 2024 13:01:10 +0200 Subject: [PATCH 54/59] Use fixture grid_search_data --- tests/conftest.py | 22 ++++++++++++++++ tests/test_grid_search.py | 53 ++++++++++++--------------------------- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index fa44727..e2d6c79 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -221,3 +221,25 @@ def feature_importance_dataset(): w = pd.Series(w) return X, y, w + + +@pytest.fixture(scope="session") +def grid_search_data(): + rng = np.random.default_rng(_SEED) + n_samples = 250 + n_test_samples = 100 + n_features = 3 + n_variants = 4 + X = rng.standard_normal((n_samples, n_features)) + X_test = rng.standard_normal((n_test_samples, n_features)) + + y_class = rng.integers(0, 2, n_samples) + y_test_class = rng.integers(0, 2, n_test_samples) + + y_reg = rng.standard_normal(n_samples) + y_test_reg = rng.standard_normal(n_test_samples) + + w = rng.integers(0, n_variants, n_samples) + w_test = rng.integers(0, n_variants, n_test_samples) + + return X, y_class, y_reg, w, X_test, y_test_class, y_test_reg, w_test diff --git a/tests/test_grid_search.py b/tests/test_grid_search.py index aeae5ec..93ffff0 100644 --- a/tests/test_grid_search.py +++ b/tests/test_grid_search.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: BSD-3-Clause +import numpy as np import pytest from lightgbm import LGBMClassifier, LGBMRegressor from sklearn.linear_model import LinearRegression, LogisticRegression @@ -84,16 +85,22 @@ ), ], ) -@pytest.mark.parametrize("n_variants", [2, 5]) def test_metalearnergridsearch_smoke( metalearner_factory, is_classification, - n_variants, base_learner_grid, param_grid, - rng, expected_n_configs, + grid_search_data, ): + X, y_class, y_reg, w, X_test, y_test_class, y_test_reg, w_test = grid_search_data + if is_classification: + y = y_class + y_test = y_test_class + else: + y = y_reg + y_test = y_test_reg + n_variants = len(np.unique(w)) metalearner_params = { "is_classification": is_classification, "n_variants": n_variants, @@ -105,18 +112,6 @@ def test_metalearnergridsearch_smoke( base_learner_grid=base_learner_grid, param_grid=param_grid, ) - n_samples = 250 - n_test_samples = 100 - X = rng.standard_normal((n_samples, 3)) - X_test = rng.standard_normal((n_test_samples, 3)) - if is_classification: - y = rng.integers(0, 2, n_samples) - y_test = rng.integers(0, 2, n_test_samples) - else: - y = rng.standard_normal(n_samples) - y_test = rng.standard_normal(n_test_samples) - w = rng.integers(0, n_variants, n_samples) - w_test = rng.integers(0, n_variants, n_test_samples) gs.fit(X, y, w, X_test, y_test, w_test) assert gs.results_ is not None @@ -131,17 +126,9 @@ def test_metalearnergridsearch_smoke( assert train_scores_cols == test_scores_cols -def test_metalearnergridsearch_reuse_nuisance_smoke(rng): - n_variants = 3 - n_samples = 250 - n_test_samples = 100 - - X = rng.standard_normal((n_samples, 3)) - X_test = rng.standard_normal((n_test_samples, 3)) - y = rng.standard_normal(n_samples) - y_test = rng.standard_normal(n_test_samples) - w = rng.integers(0, n_variants, n_samples) - w_test = rng.integers(0, n_variants, n_test_samples) +def test_metalearnergridsearch_reuse_nuisance_smoke(grid_search_data): + X, _, y, w, X_test, _, y_test, w_test = grid_search_data + n_variants = len(np.unique(w)) tl = TLearner( False, @@ -190,17 +177,9 @@ def test_metalearnergridsearch_reuse_nuisance_smoke(rng): assert gs.results_.shape[0] == 8 -def test_metalearnergridsearch_reuse_propensity_smoke(rng): - n_variants = 3 - n_samples = 250 - n_test_samples = 100 - - X = rng.standard_normal((n_samples, 3)) - X_test = rng.standard_normal((n_test_samples, 3)) - y = rng.standard_normal(n_samples) - y_test = rng.standard_normal(n_test_samples) - w = rng.integers(0, n_variants, n_samples) - w_test = rng.integers(0, n_variants, n_test_samples) +def test_metalearnergridsearch_reuse_propensity_smoke(grid_search_data): + X, _, y, w, X_test, _, y_test, w_test = grid_search_data + n_variants = len(np.unique(w)) rl = RLearner( False, From 5a6c91fb2cc6ce4482d3b471786e34a7a056a012 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 4 Jul 2024 13:08:32 +0200 Subject: [PATCH 55/59] Add docc about results_ --- metalearners/grid_search.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 2dcabd8..efecfbb 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -161,6 +161,8 @@ class MetaLearnerGridSearch: For information on how to define ``scoring`` see :meth:`~metalearners.metalearner.MetaLearner.evaluate`. ``verbose`` will be passed to `joblib.Parallel `_. + + After fitting a dataframe with the results will be available in `results_`. """ # TODO: Add a reference to a docs example once it is written. From 991b2f1696d0216339dffe12bb481d1ffce5d609 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 4 Jul 2024 13:21:20 +0200 Subject: [PATCH 56/59] Index dataframe with config --- metalearners/grid_search.py | 8 ++++++++ tests/test_grid_search.py | 16 ++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index efecfbb..60e2dad 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -110,6 +110,14 @@ def _format_results(results: Sequence[_GSResult]) -> pd.DataFrame: row[f"test_{name}"] = value rows.append(row) df = pd.DataFrame(rows) + index_columns = [ + c + for c in df.columns + if not c.endswith("_time") + and not c.startswith("train_") + and not c.startswith("test_") + ] + df = df.set_index(index_columns) return df diff --git a/tests/test_grid_search.py b/tests/test_grid_search.py index 93ffff0..fd953ff 100644 --- a/tests/test_grid_search.py +++ b/tests/test_grid_search.py @@ -17,7 +17,7 @@ @pytest.mark.parametrize( - "metalearner_factory, is_classification, base_learner_grid, param_grid, expected_n_configs", + "metalearner_factory, is_classification, base_learner_grid, param_grid, expected_n_configs, expected_index_cols", [ ( SLearner, @@ -25,6 +25,7 @@ {"base_model": [LinearRegression, LGBMRegressor]}, {"base_model": {"LGBMRegressor": {"n_estimators": [1, 2]}}}, 3, + 3, ), ( SLearner, @@ -32,6 +33,7 @@ {"base_model": [LogisticRegression, LGBMClassifier]}, {"base_model": {"LGBMClassifier": {"n_estimators": [1, 2]}}}, 3, + 3, ), ( TLearner, @@ -39,6 +41,7 @@ {"variant_outcome_model": [LinearRegression, LGBMRegressor]}, {"variant_outcome_model": {"LGBMRegressor": {"n_estimators": [1, 2, 3]}}}, 4, + 3, ), ( XLearner, @@ -55,6 +58,7 @@ "treatment_effect_model": {"LGBMRegressor": {"n_estimators": [1]}}, }, 6, + 8, ), ( RLearner, @@ -66,9 +70,12 @@ }, { "propensity_model": {"LGBMClassifier": {"n_estimators": [1, 2, 3]}}, - "treatment_model": {"LGBMRegressor": {"n_estimators": [1, 2, 3]}}, + "treatment_model": { + "LGBMRegressor": {"n_estimators": [1, 2, 3], "learning_rate": [0.4]} + }, }, 9, + 7, ), ( DRLearner, @@ -82,6 +89,7 @@ "propensity_model": {"LGBMClassifier": {"n_estimators": [1, 2, 3, 4]}}, }, 4, + 5, ), ], ) @@ -91,6 +99,7 @@ def test_metalearnergridsearch_smoke( base_learner_grid, param_grid, expected_n_configs, + expected_index_cols, grid_search_data, ): X, y_class, y_reg, w, X_test, y_test_class, y_test_reg, w_test = grid_search_data @@ -116,6 +125,7 @@ def test_metalearnergridsearch_smoke( gs.fit(X, y, w, X_test, y_test, w_test) assert gs.results_ is not None assert gs.results_.shape[0] == expected_n_configs + assert len(gs.results_.index.names) == expected_index_cols train_scores_cols = set( c[6:] for c in list(gs.results_.columns) if c.startswith("train_") @@ -175,6 +185,7 @@ def test_metalearnergridsearch_reuse_nuisance_smoke(grid_search_data): } assert gs.results_ is not None assert gs.results_.shape[0] == 8 + assert len(gs.results_.index.names) == 7 def test_metalearnergridsearch_reuse_propensity_smoke(grid_search_data): @@ -220,3 +231,4 @@ def test_metalearnergridsearch_reuse_propensity_smoke(grid_search_data): assert raw_result.metalearner._prefitted_nuisance_models == {PROPENSITY_MODEL} assert gs.results_ is not None assert gs.results_.shape[0] == 2 + assert len(gs.results_.index.names) == 5 From 29db2bb7c54e9207077b54fdc55a92c78c81467f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 4 Jul 2024 14:29:24 +0200 Subject: [PATCH 57/59] Rename kwargs to metalerner_fit_params --- metalearners/grid_search.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 60e2dad..22f1c97 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -26,7 +26,9 @@ class _FitAndScoreJob: w_test: Vector | None oos_method: OosMethod scoring: Scoring | None - kwargs: dict + # These are the params which are passed through kwargs in MetaLearnerGridSearch.fit + # which should be unpacked and passed to MetaLearner.fit + metalerner_fit_params: dict[str, Any] @dataclass(frozen=True) @@ -42,7 +44,9 @@ class _GSResult: def _fit_and_score(job: _FitAndScoreJob) -> _GSResult: start_time = time.time() - job.metalearner.fit(job.X_train, job.y_train, job.w_train, **job.kwargs) + job.metalearner.fit( + job.X_train, job.y_train, job.w_train, **job.metalerner_fit_params + ) fit_time = time.time() - start_time train_scores = job.metalearner.evaluate( @@ -298,7 +302,7 @@ def fit( w_test=w_test, oos_method=oos_method, scoring=self.scoring, - kwargs=kwargs, + metalerner_fit_params=kwargs, ) ) From 669f37f83c034bce59506bd82194f7868db7e4ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 5 Jul 2024 08:46:58 +0200 Subject: [PATCH 58/59] Rephrase docs --- metalearners/grid_search.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 22f1c97..1ad8623 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -129,8 +129,9 @@ class MetaLearnerGridSearch: """Exhaustive search over specified parameter values for a MetaLearner. ``metalearner_params`` should contain the necessary params for the MetaLearner initialization - such as ``n_variants`` and ``is_classification``. It can also contain optional parameters - that all MetaLearners should be initialized with such as ``n_folds`` or ``feature_set``. + such as ``n_variants`` and ``is_classification``. If one wants to pass optional parameters + to the ``MetaLearner`` initialization, such as ``n_folds`` or ``feature_set``, this should + be done by this way, too. Importantly, ``random_state`` must be passed through the ``random_state`` parameter and not through ``metalearner_params``. From 7b971737dff24c9fcc72004b2837af2b50896614 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 5 Jul 2024 08:47:17 +0200 Subject: [PATCH 59/59] Spacing docs --- metalearners/grid_search.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 1ad8623..21afde4 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -135,14 +135,15 @@ class MetaLearnerGridSearch: Importantly, ``random_state`` must be passed through the ``random_state`` parameter and not through ``metalearner_params``. - ``base_learner_grid`` keys should be the names of the needed base models contained in the :class:`~metalearners.metalearners.MetaLearner` - defined by ``metalearner_factory``, for information about this names check + ``base_learner_grid`` keys should be the names of the needed base models contained in the + :class:`~metalearners.metalearners.MetaLearner` defined by ``metalearner_factory``, for + information about this names check :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` and :meth:`~metalearners.metalearner.MetaLearner.treatment_model_specifications`. The values should be sequences of model factories. - If base models are meant to be reused, they should be passed through ``metalearner_params`` and the corresponding keys - should not be passed to ``base_learner_grid``. + If base models are meant to be reused, they should be passed through ``metalearner_params`` + and the corresponding keys should not be passed to ``base_learner_grid``. ``param_grid`` should contain the parameters grid for each type of model used by the base learners defined in ``base_learner_grid``. The keys should be strings with the