From e8b64e6bcfc1ba811dc45485eac5e466b4bb856f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Fri, 14 Jun 2024 12:53:47 +0200 Subject: [PATCH 01/32] Speedup tests Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- tests/conftest.py | 80 +++++++++++++++++++++++++-------------- tests/test_learner.py | 11 +++--- tests/test_metalearner.py | 11 +++--- 3 files changed, 63 insertions(+), 39 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 449ab24..862131b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -75,8 +75,9 @@ def mindset_data(): return load_mindset_data() -@pytest.fixture(scope="function") -def twins_data(rng): +@pytest.fixture(scope="session") +def twins_data(): + rng = np.random.default_rng(_SEED) ( chosen_df, outcome_column, @@ -94,28 +95,30 @@ def twins_data(rng): ) -@pytest.fixture(scope="module") +@pytest.fixture(scope="session") def n_numericals(): return 25 -@pytest.fixture(scope="module") +@pytest.fixture(scope="session") def n_categoricals(): return 5 -@pytest.fixture(scope="module") +@pytest.fixture(scope="session") def sample_size(): return 100_000 -@pytest.fixture(scope="function") -def numerical_covariates(sample_size, n_numericals, rng): +@pytest.fixture(scope="session") +def numerical_covariates(sample_size, n_numericals): + rng = np.random.default_rng(_SEED) return generate_covariates(sample_size, n_numericals, format="numpy", rng=rng) -@pytest.fixture(scope="function") -def mixed_covariates(sample_size, n_numericals, n_categoricals, rng): +@pytest.fixture(scope="session") +def mixed_covariates(sample_size, n_numericals, n_categoricals): + rng = np.random.default_rng(_SEED) return generate_covariates( sample_size, n_numericals + n_categoricals, @@ -125,52 +128,72 @@ def mixed_covariates(sample_size, n_numericals, n_categoricals, rng): ) -@pytest.fixture(scope="function") +@pytest.fixture(scope="session") def numerical_experiment_dataset_continuous_outcome_binary_treatment_linear_te( - numerical_covariates, rng + sample_size, n_numericals ): - covariates, _, _ = numerical_covariates + rng = np.random.default_rng(_SEED) + covariates, _, _ = generate_covariates( + sample_size, n_numericals, format="numpy", rng=rng + ) return _generate_rct_experiment_data(covariates, False, rng, 0.3, None) -@pytest.fixture(scope="function") +@pytest.fixture(scope="session") def numerical_experiment_dataset_binary_outcome_binary_treatment_linear_te( - numerical_covariates, rng + sample_size, n_numericals ): - covariates, _, _ = numerical_covariates + rng = np.random.default_rng(_SEED) + covariates, _, _ = generate_covariates( + sample_size, n_numericals, format="numpy", rng=rng + ) return _generate_rct_experiment_data(covariates, True, rng, 0.3, None) -@pytest.fixture(scope="function") +@pytest.fixture(scope="session") def mixed_experiment_dataset_continuous_outcome_binary_treatment_linear_te( - mixed_covariates, rng + sample_size, n_numericals, n_categoricals ): - covariates, _, _ = mixed_covariates + rng = np.random.default_rng(_SEED) + covariates, _, _ = generate_covariates( + sample_size, + n_numericals + n_categoricals, + n_categoricals=n_categoricals, + format="pandas", + rng=rng, + ) return _generate_rct_experiment_data(covariates, False, rng, 0.3, None) -@pytest.fixture(scope="function") +@pytest.fixture(scope="session") def numerical_experiment_dataset_continuous_outcome_multi_treatment_linear_te( - numerical_covariates, rng + sample_size, n_numericals ): - covariates, _, _ = numerical_covariates + rng = np.random.default_rng(_SEED) + covariates, _, _ = generate_covariates( + sample_size, n_numericals, format="numpy", rng=rng + ) return _generate_rct_experiment_data( covariates, False, rng, [0.2, 0.1, 0.3, 0.15, 0.25], None ) -@pytest.fixture(scope="function") +@pytest.fixture(scope="session") def numerical_experiment_dataset_continuous_outcome_multi_treatment_constant_te( - numerical_covariates, rng + sample_size, n_numericals ): - covariates, _, _ = numerical_covariates + rng = np.random.default_rng(_SEED) + covariates, _, _ = generate_covariates( + sample_size, n_numericals, format="numpy", rng=rng + ) return _generate_rct_experiment_data( covariates, False, rng, [0.2, 0.1, 0.3, 0.15, 0.25], np.array([-2, 5, 0, 3]) ) -@pytest.fixture -def dummy_dataset(rng): +@pytest.fixture(scope="session") +def dummy_dataset(): + rng = np.random.default_rng(_SEED) sample_size = 100 n_features = 10 X = rng.standard_normal((sample_size, n_features)) @@ -179,8 +202,9 @@ def dummy_dataset(rng): return X, y, w -@pytest.fixture(scope="function") -def feature_importance_dataset(rng): +@pytest.fixture(scope="session") +def feature_importance_dataset(): + rng = np.random.default_rng(_SEED) n_samples = 10000 x0 = rng.normal(10, 1, n_samples) x1 = rng.normal(2, 1, n_samples) diff --git a/tests/test_learner.py b/tests/test_learner.py index e76018e..e40a167 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -312,9 +312,8 @@ def test_learner_twins(metalearner, reference_value, twins_data, rng): @pytest.mark.parametrize("n_classes", [2, 5, 10]) @pytest.mark.parametrize("n_variants", [2, 5]) @pytest.mark.parametrize("is_classification", [True, False]) -def test_learner_evaluate( - metalearner, is_classification, rng, sample_size, n_classes, n_variants -): +def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_variants): + sample_size = 1000 factory = metalearner_factory(metalearner) if n_variants > 2 and not factory._supports_multi_treatment(): pytest.skip() @@ -617,8 +616,9 @@ def test_conditional_average_outcomes_smoke( @pytest.mark.parametrize("n_classes", [5, 10]) @pytest.mark.parametrize("n_variants", [2, 5]) def test_conditional_average_outcomes_smoke_multi_class( - metalearner_prefix, rng, sample_size, n_classes, n_variants + metalearner_prefix, rng, n_classes, n_variants ): + sample_size = 1000 factory = metalearner_factory(metalearner_prefix) X = rng.standard_normal((sample_size, 10)) @@ -648,8 +648,9 @@ def test_conditional_average_outcomes_smoke_multi_class( @pytest.mark.parametrize("n_variants", [2, 5]) @pytest.mark.parametrize("is_classification", [True, False]) def test_predict_smoke( - metalearner_prefix, is_classification, rng, sample_size, n_classes, n_variants + metalearner_prefix, is_classification, rng, n_classes, n_variants ): + sample_size = 1000 factory = metalearner_factory(metalearner_prefix) if n_variants > 2 and not factory._supports_multi_treatment(): pytest.skip() diff --git a/tests/test_metalearner.py b/tests/test_metalearner.py index 9de1af3..2e89913 100644 --- a/tests/test_metalearner.py +++ b/tests/test_metalearner.py @@ -152,7 +152,7 @@ def test_metalearner_init( @pytest.mark.parametrize( "implementation", - [_TestMetaLearner, TLearner, SLearner, XLearner, RLearner, DRLearner], + [TLearner, SLearner, XLearner, RLearner, DRLearner], ) def test_metalearner_categorical( mixed_experiment_dataset_continuous_outcome_binary_treatment_linear_te, @@ -198,7 +198,7 @@ def test_metalearner_categorical( @pytest.mark.parametrize( "implementation", - [_TestMetaLearner, TLearner, SLearner, XLearner, RLearner, DRLearner], + [TLearner, SLearner, XLearner, RLearner, DRLearner], ) def test_metalearner_missing_data_smoke( mixed_experiment_dataset_continuous_outcome_binary_treatment_linear_te, @@ -227,7 +227,7 @@ def test_metalearner_missing_data_smoke( @pytest.mark.parametrize( "implementation", - [_TestMetaLearner, TLearner, SLearner, XLearner, RLearner, DRLearner], + [TLearner, SLearner, XLearner, RLearner, DRLearner], ) def test_metalearner_missing_data_error( numerical_experiment_dataset_continuous_outcome_binary_treatment_linear_te, @@ -258,7 +258,7 @@ def test_metalearner_missing_data_error( @pytest.mark.parametrize( "implementation", - [_TestMetaLearner, TLearner, SLearner, XLearner, RLearner, DRLearner], + [TLearner, SLearner, XLearner, RLearner, DRLearner], ) def test_metalearner_format_consistent( numerical_experiment_dataset_continuous_outcome_binary_treatment_linear_te, @@ -345,7 +345,7 @@ def test_n_folds(n_folds): @pytest.mark.parametrize( "implementation", - [_TestMetaLearner, TLearner, SLearner, XLearner, RLearner, DRLearner], + [TLearner, SLearner, XLearner, RLearner, DRLearner], ) def test_metalearner_model_names(implementation): set1 = set(implementation.nuisance_model_specifications().keys()) @@ -702,7 +702,6 @@ def test_fit_params_rlearner_error(dummy_dataset): @pytest.mark.parametrize( "implementation, needs_estimates", [ - (_TestMetaLearner, True), (TLearner, True), (SLearner, True), (XLearner, True), From 7a11445b3b1f0c607221ac5b56aacac74e0a35da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Fri, 14 Jun 2024 12:58:48 +0200 Subject: [PATCH 02/32] Switch `strict` meaning in `validate_number_positive` --- metalearners/_utils.py | 19 +++++++++++++------ metalearners/cross_fit_estimator.py | 2 +- tests/test_cross_fit_estimator.py | 4 +++- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/metalearners/_utils.py b/metalearners/_utils.py index 00eecfb..0aca691 100644 --- a/metalearners/_utils.py +++ b/metalearners/_utils.py @@ -1,7 +1,6 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause -import operator from collections.abc import Callable from inspect import signature from operator import le, lt @@ -66,14 +65,22 @@ def validate_all_vectors_same_index(*args: Vector) -> None: def validate_number_positive( - value: int | float, name: str, strict: bool = False + value: int | float, name: str, strict: bool = True ) -> None: + """Validates that a number is positive. + + If ``strict = True`` then it validates that the number is strictly positive. + """ if strict: - comparison = operator.lt + if value <= 0: + raise ValueError( + f"{name} was expected to be strictly positive but was {value}." + ) else: - comparison = operator.le - if comparison(value, 0): - raise ValueError(f"{name} was expected to be positive but was {value}.") + if value < 0: + raise ValueError( + f"{name} was expected to be positive or zero but was {value}." + ) def check_propensity_score( diff --git a/metalearners/cross_fit_estimator.py b/metalearners/cross_fit_estimator.py index e26d898..9765aa7 100644 --- a/metalearners/cross_fit_estimator.py +++ b/metalearners/cross_fit_estimator.py @@ -56,7 +56,7 @@ def _validate_data_match_prior_split( ) -> None: """Validate whether the previous test_indices and the passed data are based on the same number of observations.""" - validate_number_positive(n_observations, "n_observations", strict=False) + validate_number_positive(n_observations, "n_observations", strict=True) if test_indices is None: return expected_n_observations = sum(len(x) for x in test_indices) diff --git a/tests/test_cross_fit_estimator.py b/tests/test_cross_fit_estimator.py index 8e34b00..bb102c5 100644 --- a/tests/test_cross_fit_estimator.py +++ b/tests/test_cross_fit_estimator.py @@ -223,7 +223,9 @@ def test_crossfitestimator_n_folds_1(rng, sample_size): ) def test_validate_data_match(n_observations, test_indices, success): if n_observations < 1: - with pytest.raises(ValueError, match="was expected to be positive"): + with pytest.raises( + ValueError, match=r"was expected to be (strictly )?positive" + ): _validate_data_match_prior_split(n_observations, test_indices) return if success: From 642cb2e21b2ddb13dd6f7fa7acc63df0db43e2a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 14 Jun 2024 13:26:40 +0200 Subject: [PATCH 03/32] Add classes_ to cfe --- metalearners/cross_fit_estimator.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/metalearners/cross_fit_estimator.py b/metalearners/cross_fit_estimator.py index e26d898..97ef105 100644 --- a/metalearners/cross_fit_estimator.py +++ b/metalearners/cross_fit_estimator.py @@ -101,6 +101,7 @@ class CrossFitEstimator: _overall_estimator: _ScikitModel | None = field(init=False) _test_indices: tuple[np.ndarray] | None = field(init=False) _n_classes: int | None = field(init=False) + classes_: np.ndarray | None = field(init=False) def __post_init__(self): _validate_n_folds(self.n_folds) @@ -115,6 +116,7 @@ def __post_init__(self): self._overall_estimator: _ScikitModel | None = None self._test_indices: tuple[np.ndarray] | None = None self._n_classes: int | None = None + self.classes_: np.ndarray | None = None def _train_overall_estimator( self, X: Matrix, y: Matrix | Vector, fit_params: dict | None = None @@ -189,7 +191,14 @@ def fit( if is_classifier(self): self._n_classes = len(np.unique(y)) - + self.classes_ = np.unique(y) + for e in self._estimators: + if set(e.classes_) != set(self.classes_): # type: ignore + raise ValueError( + "Some cross fit estimators training data had less classes than " + "the overall estimator. Please check the cv parameter. If you are " + "synchronizing the folds in a MetaLearner consider not doing it." + ) return self def _initialize_prediction_tensor( From d7cef73591065382e91673b106c25fe02a6b0f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Thu, 13 Jun 2024 08:41:21 +0200 Subject: [PATCH 04/32] Fix RLoss calculation in evaluate --- metalearners/rlearner.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index bf39caa..c139dd9 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -335,6 +335,10 @@ def evaluate( treatment_evaluation = {} tau_hat = self.predict(X=X, is_oos=is_oos, oos_method=oos_method) for treatment_variant in range(1, self.n_variants): + is_treatment = w == treatment_variant + is_control = w == 0 + mask = is_treatment | is_control + propensity_estimates = w_hat[:, treatment_variant] / ( w_hat[:, 0] + w_hat[:, treatment_variant] ) @@ -344,11 +348,11 @@ def evaluate( else tau_hat[:, treatment_variant - 1, 0] ) treatment_evaluation[f"r_loss_{treatment_variant}_vs_0"] = r_loss( - cate_estimates=cate_estimates, - outcome_estimates=y_hat, - propensity_scores=propensity_estimates, - outcomes=y, - treatments=w, + cate_estimates=cate_estimates[mask], + outcome_estimates=y_hat[mask], + propensity_scores=propensity_estimates[mask], + outcomes=y[mask], + treatments=w[mask] == treatment_variant, ) return propensity_evaluation | outcome_evaluation | treatment_evaluation From 963debfc22597fa32a6a8245a43c7a90f133675b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 14 Jun 2024 14:43:05 +0200 Subject: [PATCH 05/32] Parametrize evaluate --- metalearners/_utils.py | 17 +++ metalearners/cross_fit_estimator.py | 9 +- metalearners/drlearner.py | 79 +++++++++++-- metalearners/metalearner.py | 84 +++++++++++++- metalearners/rlearner.py | 64 ++++++++--- metalearners/slearner.py | 35 ++++-- metalearners/tlearner.py | 43 ++++--- metalearners/xlearner.py | 78 ++++++++++++- tests/test_learner.py | 167 +++++++++++++++++++++++++--- 9 files changed, 503 insertions(+), 73 deletions(-) diff --git a/metalearners/_utils.py b/metalearners/_utils.py index 0aca691..1d82b7b 100644 --- a/metalearners/_utils.py +++ b/metalearners/_utils.py @@ -463,3 +463,20 @@ def simplify_output_2d(tensor: np.ndarray) -> np.ndarray: "This function requires a regression or a classification with binary outcome " "task." ) + + +# Taken from https://stackoverflow.com/questions/13741998/is-there-a-way-to-let-classes-inherit-the-documentation-of-their-superclass-with +def copydoc(fromfunc, sep="\n"): + """ + Decorator: Copy the docstring of `fromfunc` + """ + + def _decorator(func): + sourcedoc = fromfunc.__doc__ + if func.__doc__ is None: + func.__doc__ = sourcedoc + else: + func.__doc__ = sep.join([sourcedoc, func.__doc__]) + return func + + return _decorator diff --git a/metalearners/cross_fit_estimator.py b/metalearners/cross_fit_estimator.py index 889f20c..0cccb8a 100644 --- a/metalearners/cross_fit_estimator.py +++ b/metalearners/cross_fit_estimator.py @@ -362,12 +362,17 @@ def __init__( self.original_predict_proba = model.predict_proba def __enter__(self): - self.model.predict = partial( # type: ignore + new_predict = partial( self.model.predict, is_oos=self.is_oos, oos_method=self.oos_method ) - self.model.predict_proba = partial( # type: ignore + new_predict.__name__ = "predict" # type: ignore + self.model.predict = new_predict # type: ignore + + new_predict_proba = partial( self.model.predict_proba, is_oos=self.is_oos, oos_method=self.oos_method ) + new_predict_proba.__name__ = "predict_proba" # type: ignore + self.model.predict_proba = new_predict_proba # type: ignore return self.model def __exit__(self, *args): diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index ea9b2f1..93b86a5 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -1,6 +1,8 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause +from collections.abc import Callable, Mapping + import numpy as np from joblib import Parallel, delayed from typing_extensions import Self @@ -23,6 +25,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, + _evaluate_model, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -148,6 +151,7 @@ def fit( w=w, y=y, treatment_variant=treatment_variant, + is_oos=False, ) treatment_jobs.append( @@ -205,37 +209,90 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: - raise NotImplementedError( - "This feature is not yet implemented for the DR-Learner." + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + if scoring is None: + scoring = {} + self._validate_scoring(scoring=scoring) + + default_metric = ( + "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" + ) + masks = [] + for tv in range(self.n_variants): + masks.append(w == tv) + variant_outcome_evaluation = _evaluate_model( + cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], + X=[X[w == tv] for tv in range(self.n_variants)], + y=[y[w == tv] for tv in range(self.n_variants)], + scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + model_kind=VARIANT_OUTCOME_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, + ) + + propensity_evaluation = _evaluate_model( + cfes=self._nuisance_models[PROPENSITY_MODEL], + X=[X], + y=[w], + scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + model_kind=PROPENSITY_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, ) + pseudo_outcome: list[np.ndarray] = [] + for treatment_variant in range(1, self.n_variants): + tv_pseudo_outcome = self._pseudo_outcome( + X=X, + y=y, + w=w, + treatment_variant=treatment_variant, + is_oos=is_oos, + oos_method=oos_method, + ) + pseudo_outcome.append(tv_pseudo_outcome) + + treatment_evaluation = _evaluate_model( + self._treatment_models[TREATMENT_MODEL], + X=[X for _ in range(1, self.n_variants)], + y=pseudo_outcome, + scorers=scoring.get(TREATMENT_MODEL, ["neg_root_mean_squared_error"]), + model_kind=TREATMENT_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=True, + ) + + return variant_outcome_evaluation | propensity_evaluation | treatment_evaluation + def _pseudo_outcome( self, X: Matrix, y: Vector, w: Vector, treatment_variant: int, + is_oos: bool, + oos_method: OosMethod = OVERALL, epsilon: float = _EPSILON, ) -> np.ndarray: - """Compute the DR-Learner pseudo outcome. - - Importantly, this method assumes to be applied on in-sample data. - In other words, ``is_oos`` will always be set to ``False`` when calling - ``predict_nuisance``. - """ + """Compute the DR-Learner pseudo outcome.""" validate_valid_treatment_variant_not_control(treatment_variant, self.n_variants) conditional_average_outcome_estimates = ( self.predict_conditional_average_outcomes( X=X, - is_oos=False, + is_oos=is_oos, + oos_method=oos_method, ) ) propensity_estimates = self.predict_nuisance( X=X, - is_oos=False, + is_oos=is_oos, + oos_method=oos_method, model_kind=PROPENSITY_MODEL, model_ord=0, ) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 945c791..90ccda8 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: BSD-3-Clause from abc import ABC, abstractmethod -from collections.abc import Callable, Collection +from collections.abc import Callable, Collection, Mapping, Sequence from copy import deepcopy from dataclasses import dataclass from typing import TypedDict @@ -10,6 +10,7 @@ import numpy as np import pandas as pd import shap +from sklearn.metrics import get_scorer from sklearn.model_selection import KFold from typing_extensions import Self @@ -32,6 +33,7 @@ from metalearners.cross_fit_estimator import ( OVERALL, CrossFitEstimator, + _PredictContext, ) from metalearners.explainer import Explainer @@ -133,6 +135,41 @@ def _validate_n_folds_synchronize(n_folds: dict[str, int]) -> None: raise ValueError("Need at least two folds to use synchronization.") +def _evaluate_model( + cfes: Sequence[CrossFitEstimator], + X: Sequence[Matrix], + y: Sequence[Vector], + scorers: Sequence[str | Callable], + model_kind: str, + is_oos: bool, + is_treatment: bool, + oos_method: OosMethod = OVERALL, +) -> dict[str, float]: + """Helper function to evaluate all the models of the same model kind.""" + prefix = f"{model_kind}_" + evaluation_metrics: dict[str, float] = {} + for idx, scorer in enumerate(scorers): + if isinstance(scorer, str): + scorer_str = scorer + scorer_call: Callable = get_scorer(scorer) + else: + scorer_str = f"custom_scorer_{idx}" + scorer_call = scorer + for i, cfe in enumerate(cfes): + if is_treatment: + treatment_variant = i + 1 + index_str = f"{treatment_variant}_vs_0_" + else: + if len(cfes) == 1: + index_str = "" + else: + index_str = f"{i}_" + name = f"{prefix}{index_str}{scorer_str}" + with _PredictContext(cfe, is_oos, oos_method) as modified_cfe: + evaluation_metrics[name] = scorer_call(modified_cfe, X[i], y[i]) + return evaluation_metrics + + class _ModelSpecifications(TypedDict): # The quotes on MetaLearner are necessary for type hinting as it's not yet defined # here. Check https://stackoverflow.com/questions/55320236/does-python-evaluate-type-hinting-of-a-forward-reference @@ -311,6 +348,16 @@ def _validate_models(self) -> None: factory, predict_method, name=f"treatment model {model_kind}" ) + @classmethod + def _validate_scoring(cls, scoring: Mapping[str, list[str | Callable]]): + if not set(scoring.keys()) <= ( + set(cls.nuisance_model_specifications().keys()) + | set(cls.treatment_model_specifications().keys()) + ): + raise ValueError( + "scoring dict keys need to be a subset of the model names in the MetaLearner" + ) + def _qualified_fit_params( self, fit_params: None | dict, @@ -824,8 +871,39 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: - """Evaluate all models contained in a MetaLearner.""" + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + r"""Evaluate the models models contained in the MetaLearner. + + ``scoring`` keys must be a subset of the names of the models contained in the + MetaLearner, for information about this names check :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` + and :meth:`~metalearners.metalearner.MetaLearner.treatment_model_specifications`. + The values must be a list of: + + * ``string`` representing a ``sklearn`` scoring method. Check + `here `__ + for the possible values. + * ``Callable`` with signature ``scorer(estimator, X, y_true, **kwargs)``. We recommend + using `sklearn.metrics.make_scorer `_ + to create this callables. + + If some model name is not present in the keys of ``scoring`` then the default used + metrics will be ``neg_log_loss`` if it is a classifier and ``neg_root_mean_squared_error`` + if it is a regressor. + + The returned dictionary keys have the following structure: + + * For nuisance models: + + * If the cardinality is one: ``f"{model_kind}_{scorer}"`` + * If there is one model for each treatment variant (including control): + ``f"{model_kind}_{treatment_variant}_{scorer}"`` + + * For treatment models: ``f"{model_kind}_{treatment_variant}_vs_0_{scorer}"`` + + Where ``scorer`` is the name of the scorer if it is a string and ``"custom_scorer_{idx}"`` + if it is a callable where ``idx`` is the index in the ``scorers`` list. + """ ... def explainer( diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index 6a09847..349fbea 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -1,14 +1,17 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause +from collections.abc import Callable, Mapping + import numpy as np from joblib import Parallel, delayed -from sklearn.metrics import log_loss, root_mean_squared_error +from sklearn.metrics import root_mean_squared_error from typing_extensions import Self from metalearners._typing import Matrix, OosMethod, Vector from metalearners._utils import ( clip_element_absolute_value_to_epsilon, + copydoc, function_has_argument, get_one, get_predict, @@ -24,6 +27,7 @@ TREATMENT, TREATMENT_MODEL, MetaLearner, + _evaluate_model, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -323,6 +327,7 @@ def predict( tau_hat[variant_indices, treatment_variant - 1] = variant_estimates return tau_hat + @copydoc(MetaLearner.evaluate, sep="\n\t") def evaluate( self, X: Matrix, @@ -330,7 +335,41 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + """In the RLearner case, the ``"treatment_model"`` is always evaluated with the + :func:`~metalearners.rlearner.r_loss` and the ``scoring["treatment_model"]`` + parameter is ignored.""" + if scoring is None: + scoring = {} + self._validate_scoring(scoring=scoring) + + propensity_evaluation = _evaluate_model( + cfes=self._nuisance_models[PROPENSITY_MODEL], + X=[X], + y=[w], + scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + model_kind=PROPENSITY_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, + ) + + default_metric = ( + "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" + ) + outcome_evaluation = _evaluate_model( + cfes=self._nuisance_models[OUTCOME_MODEL], + X=[X], + y=[y], + scorers=scoring.get(OUTCOME_MODEL, [default_metric]), + model_kind=OUTCOME_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, + ) + + # TODO: improve this? generalize it to other metalearners? w_hat = self.predict_nuisance( X=X, is_oos=is_oos, @@ -338,7 +377,6 @@ def evaluate( model_kind=PROPENSITY_MODEL, model_ord=0, ) - propensity_evaluation = {"propensity_cross_entropy": log_loss(w, w_hat)} y_hat = self.predict_nuisance( X=X, @@ -350,15 +388,13 @@ def evaluate( if self.is_classification: y_hat = y_hat[:, 1] - outcome_evaluation = ( - {"outcome_log_loss": log_loss(y, y_hat)} - if self.is_classification - else {"outcome_rmse": root_mean_squared_error(y, y_hat)} - ) - treatment_evaluation = {} tau_hat = self.predict(X=X, is_oos=is_oos, oos_method=oos_method) for treatment_variant in range(1, self.n_variants): + is_treatment = w == treatment_variant + is_control = w == 0 + mask = is_treatment | is_control + propensity_estimates = w_hat[:, treatment_variant] / ( w_hat[:, 0] + w_hat[:, treatment_variant] ) @@ -368,11 +404,11 @@ def evaluate( else tau_hat[:, treatment_variant - 1, 0] ) treatment_evaluation[f"r_loss_{treatment_variant}_vs_0"] = r_loss( - cate_estimates=cate_estimates, - outcome_estimates=y_hat, - propensity_scores=propensity_estimates, - outcomes=y, - treatments=w, + cate_estimates=cate_estimates[mask], + outcome_estimates=y_hat[mask], + propensity_scores=propensity_estimates[mask], + outcomes=y[mask], + treatments=w[mask] == treatment_variant, ) return propensity_evaluation | outcome_evaluation | treatment_evaluation diff --git a/metalearners/slearner.py b/metalearners/slearner.py index 553b558..718464a 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -2,10 +2,10 @@ # # SPDX-License-Identifier: BSD-3-Clause import warnings +from collections.abc import Callable, Mapping import numpy as np import pandas as pd -from sklearn.metrics import log_loss, root_mean_squared_error from typing_extensions import Self from metalearners._typing import Matrix, OosMethod, Vector @@ -15,7 +15,12 @@ supports_categoricals, ) from metalearners.cross_fit_estimator import OVERALL -from metalearners.metalearner import NUISANCE, MetaLearner, _ModelSpecifications +from metalearners.metalearner import ( + NUISANCE, + MetaLearner, + _evaluate_model, + _ModelSpecifications, +) _BASE_MODEL = "base_model" @@ -150,17 +155,29 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: - # TODO: Parameterize evaluation approaches. + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + if scoring is None: + scoring = {} + self._validate_scoring(scoring=scoring) + + default_metric = ( + "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" + ) + X_with_w = _append_treatment_to_covariates( X, w, self._supports_categoricals, self.n_variants ) - y_pred = self.predict_nuisance( - X=X_with_w, model_kind=_BASE_MODEL, model_ord=0, is_oos=is_oos + return _evaluate_model( + cfes=self._nuisance_models[_BASE_MODEL], + X=[X_with_w], + y=[y], + scorers=scoring.get(_BASE_MODEL, [default_metric]), + model_kind=_BASE_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, ) - if self.is_classification: - return {"cross_entropy": log_loss(y, y_pred)} - return {"rmse": root_mean_squared_error(y, y_pred)} def predict_conditional_average_outcomes( self, X: Matrix, is_oos: bool, oos_method: OosMethod = OVERALL diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index 9380144..9a5b8d9 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -2,9 +2,10 @@ # # SPDX-License-Identifier: BSD-3-Clause +from collections.abc import Callable, Mapping + import numpy as np from joblib import Parallel, delayed -from sklearn.metrics import log_loss, root_mean_squared_error from typing_extensions import Self from metalearners._typing import Matrix, OosMethod, Vector @@ -15,6 +16,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, + _evaluate_model, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -114,21 +116,26 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: - # TODO: Parametrize evaluation approaches. - conditional_average_outcomes = self.predict_conditional_average_outcomes( - X=X, is_oos=is_oos, oos_method=oos_method + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + if scoring is None: + scoring = {} + self._validate_scoring(scoring=scoring) + + default_metric = ( + "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" + ) + + masks = [] + for tv in range(self.n_variants): + masks.append(w == tv) + return _evaluate_model( + cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], + X=[X[w == tv] for tv in range(self.n_variants)], + y=[y[w == tv] for tv in range(self.n_variants)], + scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + model_kind=VARIANT_OUTCOME_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, ) - evaluation_metrics = {} - for treatment_variant in range(self.n_variants): - prefix = f"variant_{treatment_variant}" - variant_outcomes = conditional_average_outcomes[:, treatment_variant] - if self.is_classification: - evaluation_metrics[f"{prefix}_cross_entropy"] = log_loss( - y[w == treatment_variant], variant_outcomes[w == treatment_variant] - ) - else: - evaluation_metrics[f"{prefix}_rmse"] = root_mean_squared_error( - y[w == treatment_variant], variant_outcomes[w == treatment_variant] - ) - return evaluation_metrics diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index 729899c..d2b691d 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -1,6 +1,8 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause +from collections.abc import Callable, Mapping + import numpy as np from joblib import Parallel, delayed from typing_extensions import Self @@ -21,6 +23,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, + _evaluate_model, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -285,9 +288,78 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, float | int]: - raise NotImplementedError( - "This feature is not yet implemented for the X-Learner." + scoring: Mapping[str, list[str | Callable]] | None = None, + ) -> dict[str, float]: + if scoring is None: + scoring = {} + self._validate_scoring(scoring=scoring) + + default_metric = ( + "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" + ) + masks = [] + for tv in range(self.n_variants): + masks.append(w == tv) + variant_outcome_evaluation = _evaluate_model( + cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], + X=[X[w == tv] for tv in range(self.n_variants)], + y=[y[w == tv] for tv in range(self.n_variants)], + scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + model_kind=VARIANT_OUTCOME_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, + ) + + propensity_evaluation = _evaluate_model( + cfes=self._nuisance_models[PROPENSITY_MODEL], + X=[X], + y=[w], + scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + model_kind=PROPENSITY_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=False, + ) + + imputed_te_control: list[np.ndarray] = [] + imputed_te_treatment: list[np.ndarray] = [] + for treatment_variant in range(1, self.n_variants): + tv_imputed_te_control, tv_imputed_te_treatment = self._pseudo_outcome( + X, y, w, treatment_variant + ) + imputed_te_control.append(tv_imputed_te_control) + imputed_te_treatment.append(tv_imputed_te_treatment) + + te_treatment_evaluation = _evaluate_model( + self._treatment_models[TREATMENT_EFFECT_MODEL], + X=[X[w == tv] for tv in range(1, self.n_variants)], + y=imputed_te_treatment, + scorers=scoring.get( + TREATMENT_EFFECT_MODEL, ["neg_root_mean_squared_error"] + ), + model_kind=TREATMENT_EFFECT_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=True, + ) + + te_control_evaluation = _evaluate_model( + self._treatment_models[CONTROL_EFFECT_MODEL], + X=[X[w == 0] for _ in range(1, self.n_variants)], + y=imputed_te_control, + scorers=scoring.get(CONTROL_EFFECT_MODEL, ["neg_root_mean_squared_error"]), + model_kind=CONTROL_EFFECT_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment=True, + ) + + return ( + variant_outcome_evaluation + | propensity_evaluation + | te_treatment_evaluation + | te_control_evaluation ) def _pseudo_outcome( diff --git a/tests/test_learner.py b/tests/test_learner.py index 30dce74..2afe3c6 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -6,7 +6,7 @@ import pytest from lightgbm import LGBMClassifier, LGBMRegressor from sklearn.linear_model import LinearRegression, LogisticRegression -from sklearn.metrics import root_mean_squared_error +from sklearn.metrics import make_scorer, root_mean_squared_error from sklearn.model_selection import train_test_split from metalearners.cross_fit_estimator import _OOS_WHITELIST @@ -310,11 +310,12 @@ def test_learner_twins(metalearner, reference_value, twins_data, rng): assert rmse < reference_value * (1 + _OOS_REFERENCE_VALUE_TOLERANCE) -@pytest.mark.parametrize("metalearner", ["S", "T", "R"]) +@pytest.mark.parametrize("metalearner", ["S", "T", "X", "R", "DR"]) @pytest.mark.parametrize("n_classes", [2, 5, 10]) @pytest.mark.parametrize("n_variants", [2, 5]) @pytest.mark.parametrize("is_classification", [True, False]) -def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_variants): +@pytest.mark.parametrize("is_oos", [True, False]) +def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_variants, is_oos): sample_size = 1000 factory = metalearner_factory(metalearner) if n_variants > 2 and not factory._supports_multi_treatment(): @@ -323,12 +324,17 @@ def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_vari pytest.skip() # skip repeated tests if is_classification and n_classes > 2 and not factory._supports_multi_class(): pytest.skip() + test_size = 250 X = rng.standard_normal((sample_size, 10)) + X_test = rng.standard_normal((test_size, 10)) if is_oos else X w = rng.integers(0, n_variants, size=sample_size) + w_test = rng.integers(0, n_variants, test_size) if is_oos else w if is_classification: y = rng.integers(0, n_classes, size=sample_size) + y_test = rng.integers(0, n_classes, test_size) if is_oos else y else: y = rng.standard_normal(sample_size) + y_test = rng.standard_normal(test_size) if is_oos else y base_learner = _linear_base_learner(is_classification) @@ -341,28 +347,163 @@ def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_vari n_folds=2, ) learner.fit(X=X, y=y, w=w) - evaluation = learner.evaluate(X=X, y=y, w=w, is_oos=False) + evaluation = learner.evaluate(X=X_test, y=y_test, w=w_test, is_oos=is_oos) if is_classification: if metalearner == "S": - assert "cross_entropy" in evaluation - elif metalearner == "T": + assert set(evaluation.keys()) == {"base_model_neg_log_loss"} + elif metalearner in ["T", "X", "DR"]: for v in range(n_variants): - assert f"variant_{v}_cross_entropy" in evaluation + assert f"variant_outcome_model_{v}_neg_log_loss" in evaluation elif metalearner == "R": - assert "outcome_log_loss" in evaluation + assert "outcome_model_neg_log_loss" in evaluation else: if metalearner == "S": - assert "rmse" in evaluation - elif metalearner == "T": + assert set(evaluation.keys()) == {"base_model_neg_root_mean_squared_error"} + elif metalearner in ["T", "X", "DR"]: for v in range(n_variants): - assert f"variant_{v}_rmse" in evaluation + assert ( + f"variant_outcome_model_{v}_neg_root_mean_squared_error" + in evaluation + ) elif metalearner == "R": - assert "outcome_rmse" in evaluation + assert "outcome_model_neg_root_mean_squared_error" in evaluation if metalearner == "R": assert ( {f"r_loss_{i}_vs_0" for i in range(1, n_variants)} - | {"propensity_cross_entropy"} + | {"propensity_model_neg_log_loss"} ) <= set(evaluation.keys()) + elif metalearner == "X": + assert "propensity_model_neg_log_loss" in evaluation + for v in range(1, n_variants): + assert ( + f"treatment_effect_model_{v}_vs_0_neg_root_mean_squared_error" + in evaluation + ) + assert ( + f"control_effect_model_{v}_vs_0_neg_root_mean_squared_error" + in evaluation + ) + elif metalearner == "DR": + assert "propensity_model_neg_log_loss" in evaluation + for v in range(1, n_variants): + assert f"treatment_model_{v}_vs_0_neg_root_mean_squared_error" in evaluation + + +def new_score(estimator, X, y): + # This score doesn't make sense. + return np.mean(y - estimator.predict(X)) + + +def new_score_2(y, y_pred): + # This score doesn't make sense. + return np.mean(y - y_pred) + + +@pytest.mark.parametrize( + "metalearner, is_classification, scoring, expected_keys", + [ + ("S", True, {"base_model": ["accuracy"]}, {"base_model_accuracy"}), + ("S", False, {"base_model": ["max_error"]}, {"base_model_max_error"}), + ( + "T", + False, + {"variant_outcome_model": [new_score, make_scorer(new_score_2)]}, + { + "variant_outcome_model_0_custom_scorer_0", + "variant_outcome_model_0_custom_scorer_1", + "variant_outcome_model_1_custom_scorer_0", + "variant_outcome_model_1_custom_scorer_1", + "variant_outcome_model_2_custom_scorer_0", + "variant_outcome_model_2_custom_scorer_1", + }, + ), + ( + "X", + True, + { + "variant_outcome_model": ["f1"], + "propensity_model": [], + "control_effect_model": [], + "treatment_effect_model": ["r2", new_score], + }, + { + "variant_outcome_model_0_f1", + "variant_outcome_model_1_f1", + "variant_outcome_model_2_f1", + "treatment_effect_model_1_vs_0_r2", + "treatment_effect_model_1_vs_0_custom_scorer_1", + "treatment_effect_model_2_vs_0_r2", + "treatment_effect_model_2_vs_0_custom_scorer_1", + }, + ), + ( + "R", + False, + { + "outcome_model": [make_scorer(new_score_2)], + "propensity_model": [], + }, + { + "outcome_model_custom_scorer_0", + "r_loss_1_vs_0", + "r_loss_2_vs_0", + }, + ), + ( + "DR", + True, + { + "variant_outcome_model": ["f1"], + "propensity_model": [], + "treatment_model": ["r2", new_score], + }, + { + "variant_outcome_model_0_f1", + "variant_outcome_model_1_f1", + "variant_outcome_model_2_f1", + "treatment_model_1_vs_0_r2", + "treatment_model_1_vs_0_custom_scorer_1", + "treatment_model_2_vs_0_r2", + "treatment_model_2_vs_0_custom_scorer_1", + }, + ), + ], +) +@pytest.mark.parametrize("is_oos", [True, False]) +def test_learner_evaluate_scoring( + metalearner, is_classification, scoring, expected_keys, is_oos, rng +): + factory = metalearner_factory(metalearner) + nuisance_model_factory = _linear_base_learner(is_classification) + nuisance_model_params = _linear_base_learner_params(is_classification) + + n_variants = 3 + sample_size = 1000 + test_size = 250 + X = rng.standard_normal((sample_size, 10)) + X_test = rng.standard_normal((test_size, 10)) if is_oos else X + w = rng.integers(0, n_variants, size=sample_size) + w_test = rng.integers(0, n_variants, test_size) if is_oos else w + if is_classification: + y = rng.integers(0, 2, size=sample_size) + y_test = rng.integers(0, 2, test_size) if is_oos else y + else: + y = rng.standard_normal(sample_size) + y_test = rng.standard_normal(test_size) if is_oos else y + + ml = factory( + is_classification=is_classification, + n_variants=n_variants, + nuisance_model_factory=nuisance_model_factory, + propensity_model_factory=LGBMClassifier, + treatment_model_factory=LinearRegression, + nuisance_model_params=nuisance_model_params, + propensity_model_params={"n_estimators": 1}, + n_folds=2, + ) + ml.fit(X, y, w) + evaluation = ml.evaluate(X_test, y_test, w_test, is_oos, scoring=scoring) + assert set(evaluation.keys()) == expected_keys @pytest.mark.parametrize("outcome_kind", ["binary", "continuous"]) From ad71c6645a8d0d1f9a2660af9df356ac2125f692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 14 Jun 2024 14:49:41 +0200 Subject: [PATCH 06/32] run pchs --- tests/test_learner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_learner.py b/tests/test_learner.py index 2afe3c6..404fbeb 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -315,7 +315,9 @@ def test_learner_twins(metalearner, reference_value, twins_data, rng): @pytest.mark.parametrize("n_variants", [2, 5]) @pytest.mark.parametrize("is_classification", [True, False]) @pytest.mark.parametrize("is_oos", [True, False]) -def test_learner_evaluate(metalearner, is_classification, rng, n_classes, n_variants, is_oos): +def test_learner_evaluate( + metalearner, is_classification, rng, n_classes, n_variants, is_oos +): sample_size = 1000 factory = metalearner_factory(metalearner) if n_variants > 2 and not factory._supports_multi_treatment(): From e0a92397ffcae22900a53700f517cf92b12071a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Fri, 14 Jun 2024 15:13:27 +0200 Subject: [PATCH 07/32] Update CHANGELOG --- CHANGELOG.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f5a59b2..2e7ee48 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,6 +10,10 @@ Changelog 0.4.0 (2024-06-**) ------------------ +* Added ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and + implemented the abstract method for the :class:`metalearners.XLearner` and + :class:`metalearners.DRLearner`. + * Implemented :meth:`metalearners.cross_fit_estimator.CrossFitEstimator.clone`. * Added ``n_jobs_base_learners`` to :meth:`metalearners.metalearner.MetaLearner.fit`. From 476a4aedb0c82e7b42cf147d1365316aaddc82de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:46:07 +0200 Subject: [PATCH 08/32] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 22f53d1..3666689 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -135,7 +135,7 @@ def _validate_n_folds_synchronize(n_folds: dict[str, int]) -> None: raise ValueError("Need at least two folds to use synchronization.") -def _evaluate_model( +def _evaluate_model_kind( cfes: Sequence[CrossFitEstimator], X: Sequence[Matrix], y: Sequence[Vector], From 1c4c060dcb2a3f54cbe89a22eeacc21c64d3769b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:46:13 +0200 Subject: [PATCH 09/32] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 3666689..091b1d4 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -137,7 +137,7 @@ def _validate_n_folds_synchronize(n_folds: dict[str, int]) -> None: def _evaluate_model_kind( cfes: Sequence[CrossFitEstimator], - X: Sequence[Matrix], + Xs: Sequence[Matrix], y: Sequence[Vector], scorers: Sequence[str | Callable], model_kind: str, From 49f1556a7a7f5970aaadc1a6206021eca650d660 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:46:19 +0200 Subject: [PATCH 10/32] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 091b1d4..513701c 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -138,7 +138,7 @@ def _validate_n_folds_synchronize(n_folds: dict[str, int]) -> None: def _evaluate_model_kind( cfes: Sequence[CrossFitEstimator], Xs: Sequence[Matrix], - y: Sequence[Vector], + ys: Sequence[Vector], scorers: Sequence[str | Callable], model_kind: str, is_oos: bool, From d5280459812b138b893e271fcc4869047570076e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:46:31 +0200 Subject: [PATCH 11/32] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 513701c..069f7e9 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -151,7 +151,7 @@ def _evaluate_model_kind( for idx, scorer in enumerate(scorers): if isinstance(scorer, str): scorer_str = scorer - scorer_call: Callable = get_scorer(scorer) + scorer_callable: Callable = get_scorer(scorer) else: scorer_str = f"custom_scorer_{idx}" scorer_call = scorer From 631505ebf5a6d851db82b7ca1137d5f15282ede0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:46:37 +0200 Subject: [PATCH 12/32] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 069f7e9..cc66c3d 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -150,7 +150,7 @@ def _evaluate_model_kind( evaluation_metrics: dict[str, float] = {} for idx, scorer in enumerate(scorers): if isinstance(scorer, str): - scorer_str = scorer + scorer_name = scorer scorer_callable: Callable = get_scorer(scorer) else: scorer_str = f"custom_scorer_{idx}" From e0e70fa49dc0a62aa4f66292fccc681662f3ecd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Mon, 24 Jun 2024 08:49:56 +0200 Subject: [PATCH 13/32] Fix naming --- metalearners/drlearner.py | 20 ++++++++++---------- metalearners/metalearner.py | 8 ++++---- metalearners/rlearner.py | 14 +++++++------- metalearners/slearner.py | 8 ++++---- metalearners/tlearner.py | 8 ++++---- metalearners/xlearner.py | 26 +++++++++++++------------- 6 files changed, 42 insertions(+), 42 deletions(-) diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index 93b86a5..e50e3c6 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -25,7 +25,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, - _evaluate_model, + _evaluate_model_kind, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -221,10 +221,10 @@ def evaluate( masks = [] for tv in range(self.n_variants): masks.append(w == tv) - variant_outcome_evaluation = _evaluate_model( + variant_outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], - X=[X[w == tv] for tv in range(self.n_variants)], - y=[y[w == tv] for tv in range(self.n_variants)], + Xs=[X[w == tv] for tv in range(self.n_variants)], + ys=[y[w == tv] for tv in range(self.n_variants)], scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, @@ -232,10 +232,10 @@ def evaluate( is_treatment=False, ) - propensity_evaluation = _evaluate_model( + propensity_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[PROPENSITY_MODEL], - X=[X], - y=[w], + Xs=[X], + ys=[w], scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), model_kind=PROPENSITY_MODEL, is_oos=is_oos, @@ -255,10 +255,10 @@ def evaluate( ) pseudo_outcome.append(tv_pseudo_outcome) - treatment_evaluation = _evaluate_model( + treatment_evaluation = _evaluate_model_kind( self._treatment_models[TREATMENT_MODEL], - X=[X for _ in range(1, self.n_variants)], - y=pseudo_outcome, + Xs=[X for _ in range(1, self.n_variants)], + ys=pseudo_outcome, scorers=scoring.get(TREATMENT_MODEL, ["neg_root_mean_squared_error"]), model_kind=TREATMENT_MODEL, is_oos=is_oos, diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index cc66c3d..596a5ad 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -153,8 +153,8 @@ def _evaluate_model_kind( scorer_name = scorer scorer_callable: Callable = get_scorer(scorer) else: - scorer_str = f"custom_scorer_{idx}" - scorer_call = scorer + scorer_name = f"custom_scorer_{idx}" + scorer_callable = scorer for i, cfe in enumerate(cfes): if is_treatment: treatment_variant = i + 1 @@ -164,9 +164,9 @@ def _evaluate_model_kind( index_str = "" else: index_str = f"{i}_" - name = f"{prefix}{index_str}{scorer_str}" + name = f"{prefix}{index_str}{scorer_name}" with _PredictContext(cfe, is_oos, oos_method) as modified_cfe: - evaluation_metrics[name] = scorer_call(modified_cfe, X[i], y[i]) + evaluation_metrics[name] = scorer_callable(modified_cfe, Xs[i], ys[i]) return evaluation_metrics diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index b1e3525..d017a38 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -27,7 +27,7 @@ TREATMENT, TREATMENT_MODEL, MetaLearner, - _evaluate_model, + _evaluate_model_kind, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -344,10 +344,10 @@ def evaluate( scoring = {} self._validate_scoring(scoring=scoring) - propensity_evaluation = _evaluate_model( + propensity_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[PROPENSITY_MODEL], - X=[X], - y=[w], + Xs=[X], + ys=[w], scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), model_kind=PROPENSITY_MODEL, is_oos=is_oos, @@ -358,10 +358,10 @@ def evaluate( default_metric = ( "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" ) - outcome_evaluation = _evaluate_model( + outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[OUTCOME_MODEL], - X=[X], - y=[y], + Xs=[X], + ys=[y], scorers=scoring.get(OUTCOME_MODEL, [default_metric]), model_kind=OUTCOME_MODEL, is_oos=is_oos, diff --git a/metalearners/slearner.py b/metalearners/slearner.py index 718464a..baa5d6a 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -18,7 +18,7 @@ from metalearners.metalearner import ( NUISANCE, MetaLearner, - _evaluate_model, + _evaluate_model_kind, _ModelSpecifications, ) @@ -168,10 +168,10 @@ def evaluate( X_with_w = _append_treatment_to_covariates( X, w, self._supports_categoricals, self.n_variants ) - return _evaluate_model( + return _evaluate_model_kind( cfes=self._nuisance_models[_BASE_MODEL], - X=[X_with_w], - y=[y], + Xs=[X_with_w], + ys=[y], scorers=scoring.get(_BASE_MODEL, [default_metric]), model_kind=_BASE_MODEL, is_oos=is_oos, diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index 9a5b8d9..befcc7f 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -16,7 +16,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, - _evaluate_model, + _evaluate_model_kind, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -129,10 +129,10 @@ def evaluate( masks = [] for tv in range(self.n_variants): masks.append(w == tv) - return _evaluate_model( + return _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], - X=[X[w == tv] for tv in range(self.n_variants)], - y=[y[w == tv] for tv in range(self.n_variants)], + Xs=[X[w == tv] for tv in range(self.n_variants)], + ys=[y[w == tv] for tv in range(self.n_variants)], scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index d2b691d..434d1fd 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -23,7 +23,7 @@ VARIANT_OUTCOME_MODEL, MetaLearner, _ConditionalAverageOutcomeMetaLearner, - _evaluate_model, + _evaluate_model_kind, _fit_cross_fit_estimator_joblib, _ModelSpecifications, _ParallelJoblibSpecification, @@ -300,10 +300,10 @@ def evaluate( masks = [] for tv in range(self.n_variants): masks.append(w == tv) - variant_outcome_evaluation = _evaluate_model( + variant_outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], - X=[X[w == tv] for tv in range(self.n_variants)], - y=[y[w == tv] for tv in range(self.n_variants)], + Xs=[X[w == tv] for tv in range(self.n_variants)], + ys=[y[w == tv] for tv in range(self.n_variants)], scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, @@ -311,10 +311,10 @@ def evaluate( is_treatment=False, ) - propensity_evaluation = _evaluate_model( + propensity_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[PROPENSITY_MODEL], - X=[X], - y=[w], + Xs=[X], + ys=[w], scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), model_kind=PROPENSITY_MODEL, is_oos=is_oos, @@ -331,10 +331,10 @@ def evaluate( imputed_te_control.append(tv_imputed_te_control) imputed_te_treatment.append(tv_imputed_te_treatment) - te_treatment_evaluation = _evaluate_model( + te_treatment_evaluation = _evaluate_model_kind( self._treatment_models[TREATMENT_EFFECT_MODEL], - X=[X[w == tv] for tv in range(1, self.n_variants)], - y=imputed_te_treatment, + Xs=[X[w == tv] for tv in range(1, self.n_variants)], + ys=imputed_te_treatment, scorers=scoring.get( TREATMENT_EFFECT_MODEL, ["neg_root_mean_squared_error"] ), @@ -344,10 +344,10 @@ def evaluate( is_treatment=True, ) - te_control_evaluation = _evaluate_model( + te_control_evaluation = _evaluate_model_kind( self._treatment_models[CONTROL_EFFECT_MODEL], - X=[X[w == 0] for _ in range(1, self.n_variants)], - y=imputed_te_control, + Xs=[X[w == 0] for _ in range(1, self.n_variants)], + ys=imputed_te_control, scorers=scoring.get(CONTROL_EFFECT_MODEL, ["neg_root_mean_squared_error"]), model_kind=CONTROL_EFFECT_MODEL, is_oos=is_oos, From e0cd5638cc0e81a24addf5b148bf060cc59f87fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:56:43 +0200 Subject: [PATCH 14/32] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 596a5ad..e1b6bab 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -873,7 +873,7 @@ def evaluate( oos_method: OosMethod = OVERALL, scoring: Mapping[str, list[str | Callable]] | None = None, ) -> dict[str, float]: - r"""Evaluate the models models contained in the MetaLearner. + r"""Evaluate the the MetaLearner. ``scoring`` keys must be a subset of the names of the models contained in the MetaLearner, for information about this names check :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` From fc01491987cf0df4d33ea76dba6aa4cba5dba92a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Mon, 24 Jun 2024 08:57:07 +0200 Subject: [PATCH 15/32] Fix docs --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index e1b6bab..5be35cc 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -873,7 +873,7 @@ def evaluate( oos_method: OosMethod = OVERALL, scoring: Mapping[str, list[str | Callable]] | None = None, ) -> dict[str, float]: - r"""Evaluate the the MetaLearner. + r"""Evaluate the MetaLearner. ``scoring`` keys must be a subset of the names of the models contained in the MetaLearner, for information about this names check :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` From 01501067b288760b5db9b2857699671541a54a61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Mon, 24 Jun 2024 09:13:10 +0200 Subject: [PATCH 16/32] Don't force subset --- metalearners/drlearner.py | 1 - metalearners/metalearner.py | 17 ++++------------- metalearners/rlearner.py | 1 - metalearners/slearner.py | 1 - metalearners/tlearner.py | 1 - metalearners/xlearner.py | 1 - 6 files changed, 4 insertions(+), 18 deletions(-) diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index e50e3c6..8d802e3 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -213,7 +213,6 @@ def evaluate( ) -> dict[str, float]: if scoring is None: scoring = {} - self._validate_scoring(scoring=scoring) default_metric = ( "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 5be35cc..76a3716 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -348,16 +348,6 @@ def _validate_models(self) -> None: factory, predict_method, name=f"treatment model {model_kind}" ) - @classmethod - def _validate_scoring(cls, scoring: Mapping[str, list[str | Callable]]): - if not set(scoring.keys()) <= ( - set(cls.nuisance_model_specifications().keys()) - | set(cls.treatment_model_specifications().keys()) - ): - raise ValueError( - "scoring dict keys need to be a subset of the model names in the MetaLearner" - ) - def _qualified_fit_params( self, fit_params: None | dict, @@ -875,9 +865,10 @@ def evaluate( ) -> dict[str, float]: r"""Evaluate the MetaLearner. - ``scoring`` keys must be a subset of the names of the models contained in the - MetaLearner, for information about this names check :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` - and :meth:`~metalearners.metalearner.MetaLearner.treatment_model_specifications`. + The keys in ``scoring`` which are not a name of a model contained in the MetaLearner + will be ignored, for information about this names check + :meth:`~metalearners.metalearner.MetaLearner.nuisance_model_specifications` and + :meth:`~metalearners.metalearner.MetaLearner.treatment_model_specifications`. The values must be a list of: * ``string`` representing a ``sklearn`` scoring method. Check diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index d017a38..b4824ba 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -342,7 +342,6 @@ def evaluate( parameter is ignored.""" if scoring is None: scoring = {} - self._validate_scoring(scoring=scoring) propensity_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[PROPENSITY_MODEL], diff --git a/metalearners/slearner.py b/metalearners/slearner.py index baa5d6a..a81b211 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -159,7 +159,6 @@ def evaluate( ) -> dict[str, float]: if scoring is None: scoring = {} - self._validate_scoring(scoring=scoring) default_metric = ( "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index befcc7f..a82f3da 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -120,7 +120,6 @@ def evaluate( ) -> dict[str, float]: if scoring is None: scoring = {} - self._validate_scoring(scoring=scoring) default_metric = ( "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index 434d1fd..8103ed9 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -292,7 +292,6 @@ def evaluate( ) -> dict[str, float]: if scoring is None: scoring = {} - self._validate_scoring(scoring=scoring) default_metric = ( "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" From 6b595bdc60f934813fa485c953a3ae58d907c0b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Mon, 24 Jun 2024 09:14:50 +0200 Subject: [PATCH 17/32] Add test to ignore --- tests/test_learner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_learner.py b/tests/test_learner.py index 404fbeb..ca2823c 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -409,7 +409,10 @@ def new_score_2(y, y_pred): ( "T", False, - {"variant_outcome_model": [new_score, make_scorer(new_score_2)]}, + { + "variant_outcome_model": [new_score, make_scorer(new_score_2)], + "to_ignore": [], + }, { "variant_outcome_model_0_custom_scorer_0", "variant_outcome_model_0_custom_scorer_1", From 19f895c493bbfc8baa2defdfc9d49bffdce0271a Mon Sep 17 00:00:00 2001 From: Kevin Klein <7267523+kklein@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:37:45 +0200 Subject: [PATCH 18/32] Centralize generation of default scoring (#22) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Centralize generation of default scoring. * Reuse more type hints. * Update metalearners/metalearner.py Co-authored-by: Francesc Martí Escofet <154450563+FrancescMartiEscofetQC@users.noreply.github.com> * Update metalearners/metalearner.py Co-authored-by: Francesc Martí Escofet <154450563+FrancescMartiEscofetQC@users.noreply.github.com> * Apply pchs. --------- Co-authored-by: Francesc Martí Escofet <154450563+FrancescMartiEscofetQC@users.noreply.github.com> --- metalearners/_typing.py | 7 +++++-- metalearners/_utils.py | 6 ++++++ metalearners/drlearner.py | 17 ++++++----------- metalearners/metalearner.py | 23 +++++++++++++++++++++++ metalearners/rlearner.py | 15 +++++---------- metalearners/slearner.py | 14 ++++---------- metalearners/tlearner.py | 16 +++++----------- metalearners/xlearner.py | 22 ++++++++-------------- 8 files changed, 62 insertions(+), 58 deletions(-) diff --git a/metalearners/_typing.py b/metalearners/_typing.py index a7f39d4..b937317 100644 --- a/metalearners/_typing.py +++ b/metalearners/_typing.py @@ -1,7 +1,7 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause -from collections.abc import Collection, Mapping +from collections.abc import Callable, Collection, Mapping, Sequence from typing import Literal, Protocol, Union import numpy as np @@ -29,7 +29,6 @@ class _ScikitModel(Protocol): # https://stackoverflow.com/questions/54868698/what-type-is-a-sklearn-model/60542986#60542986 def fit(self, X, y, *params, **kwargs): ... - def predict(self, X, *params, **kwargs): ... def score(self, X, y, **kwargs): ... @@ -44,3 +43,7 @@ def set_params(self, **params): ... # For instance, if converting the Generator resulting from a call to # sklearn.model_selection.KFold.split to a list we obtain this type. SplitIndices = list[tuple[np.ndarray, np.ndarray]] + +Scorer = str | Callable +Scorers = Sequence[Scorer] +Scoring = Mapping[str, Scorers] diff --git a/metalearners/_utils.py b/metalearners/_utils.py index 1d82b7b..095c2f8 100644 --- a/metalearners/_utils.py +++ b/metalearners/_utils.py @@ -480,3 +480,9 @@ def _decorator(func): return func return _decorator + + +def default_metric(predict_method: PredictMethod) -> str: + if predict_method == _PREDICT_PROBA: + return "neg_log_loss" + return "neg_root_mean_squared_error" diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index 8d802e3..ca5dd1a 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -1,13 +1,12 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause -from collections.abc import Callable, Mapping import numpy as np from joblib import Parallel, delayed from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector from metalearners._utils import ( clip_element_absolute_value_to_epsilon, get_one, @@ -209,14 +208,10 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, ) -> dict[str, float]: - if scoring is None: - scoring = {} + safe_scoring = self._scoring(scoring) - default_metric = ( - "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" - ) masks = [] for tv in range(self.n_variants): masks.append(w == tv) @@ -224,7 +219,7 @@ def evaluate( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], ys=[y[w == tv] for tv in range(self.n_variants)], - scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + scorers=safe_scoring[VARIANT_OUTCOME_MODEL], model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, @@ -235,7 +230,7 @@ def evaluate( cfes=self._nuisance_models[PROPENSITY_MODEL], Xs=[X], ys=[w], - scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + scorers=safe_scoring[PROPENSITY_MODEL], model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, @@ -258,7 +253,7 @@ def evaluate( self._treatment_models[TREATMENT_MODEL], Xs=[X for _ in range(1, self.n_variants)], ys=pseudo_outcome, - scorers=scoring.get(TREATMENT_MODEL, ["neg_root_mean_squared_error"]), + scorers=safe_scoring[TREATMENT_MODEL], model_kind=TREATMENT_MODEL, is_oos=is_oos, oos_method=oos_method, diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 914b62b..2fcdcdf 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -21,11 +21,13 @@ OosMethod, Params, PredictMethod, + Scoring, SplitIndices, Vector, _ScikitModel, ) from metalearners._utils import ( + default_metric, index_matrix, validate_model_and_predict_method, validate_number_positive, @@ -1025,6 +1027,27 @@ def shap_values( shap_explainer_params=shap_explainer_params, ) + def _scoring(self, scoring: Scoring | None) -> Scoring: + + def _default_scoring() -> Scoring: + return { + nuisance_model: [ + default_metric(model_specifications["predict_method"](self)) + ] + for nuisance_model, model_specifications in self.nuisance_model_specifications().items() + } | { + treatment_model: [ + default_metric(model_specifications["predict_method"](self)) + ] + for treatment_model, model_specifications in self.treatment_model_specifications().items() + } + + default_scoring = _default_scoring() + + if scoring is None: + return default_scoring + return dict(default_scoring) | dict(scoring) + class _ConditionalAverageOutcomeMetaLearner(MetaLearner, ABC): diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index b4824ba..49258bd 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -1,14 +1,13 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause -from collections.abc import Callable, Mapping import numpy as np from joblib import Parallel, delayed from sklearn.metrics import root_mean_squared_error from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector from metalearners._utils import ( clip_element_absolute_value_to_epsilon, copydoc, @@ -335,33 +334,29 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, ) -> dict[str, float]: """In the RLearner case, the ``"treatment_model"`` is always evaluated with the :func:`~metalearners.rlearner.r_loss` and the ``scoring["treatment_model"]`` parameter is ignored.""" - if scoring is None: - scoring = {} + safe_scoring = self._scoring(scoring) propensity_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[PROPENSITY_MODEL], Xs=[X], ys=[w], - scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + scorers=safe_scoring[PROPENSITY_MODEL], model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, is_treatment=False, ) - default_metric = ( - "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" - ) outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[OUTCOME_MODEL], Xs=[X], ys=[y], - scorers=scoring.get(OUTCOME_MODEL, [default_metric]), + scorers=safe_scoring[OUTCOME_MODEL], model_kind=OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, diff --git a/metalearners/slearner.py b/metalearners/slearner.py index 0d50b12..aacc93b 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -2,13 +2,12 @@ # # SPDX-License-Identifier: BSD-3-Clause import warnings -from collections.abc import Callable, Mapping import numpy as np import pandas as pd from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector from metalearners._utils import ( convert_treatment, get_one, @@ -158,14 +157,9 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, ) -> dict[str, float]: - if scoring is None: - scoring = {} - - default_metric = ( - "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" - ) + safe_scoring = self._scoring(scoring) X_with_w = _append_treatment_to_covariates( X, w, self._supports_categoricals, self.n_variants @@ -174,7 +168,7 @@ def evaluate( cfes=self._nuisance_models[_BASE_MODEL], Xs=[X_with_w], ys=[y], - scorers=scoring.get(_BASE_MODEL, [default_metric]), + scorers=safe_scoring[_BASE_MODEL], model_kind=_BASE_MODEL, is_oos=is_oos, oos_method=oos_method, diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index a82f3da..24275fc 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -2,13 +2,11 @@ # # SPDX-License-Identifier: BSD-3-Clause -from collections.abc import Callable, Mapping - import numpy as np from joblib import Parallel, delayed from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector from metalearners._utils import index_matrix from metalearners.cross_fit_estimator import OVERALL from metalearners.metalearner import ( @@ -116,23 +114,19 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, ) -> dict[str, float]: - if scoring is None: - scoring = {} - - default_metric = ( - "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" - ) + safe_scoring = self._scoring(scoring) masks = [] for tv in range(self.n_variants): masks.append(w == tv) + return _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], ys=[y[w == tv] for tv in range(self.n_variants)], - scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + scorers=safe_scoring[VARIANT_OUTCOME_MODEL], model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index 8103ed9..611bc87 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -1,13 +1,12 @@ # # Copyright (c) QuantCo 2024-2024 # # SPDX-License-Identifier: BSD-3-Clause -from collections.abc import Callable, Mapping import numpy as np from joblib import Parallel, delayed from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Vector +from metalearners._typing import Matrix, OosMethod, Scoring, Vector from metalearners._utils import ( get_one, get_predict, @@ -288,22 +287,19 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - scoring: Mapping[str, list[str | Callable]] | None = None, + scoring: Scoring | None = None, ) -> dict[str, float]: - if scoring is None: - scoring = {} + safe_scoring = self._scoring(scoring) - default_metric = ( - "neg_log_loss" if self.is_classification else "neg_root_mean_squared_error" - ) masks = [] for tv in range(self.n_variants): masks.append(w == tv) + variant_outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], ys=[y[w == tv] for tv in range(self.n_variants)], - scorers=scoring.get(VARIANT_OUTCOME_MODEL, [default_metric]), + scorers=safe_scoring[VARIANT_OUTCOME_MODEL], model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, @@ -314,7 +310,7 @@ def evaluate( cfes=self._nuisance_models[PROPENSITY_MODEL], Xs=[X], ys=[w], - scorers=scoring.get(PROPENSITY_MODEL, ["neg_log_loss"]), + scorers=safe_scoring[PROPENSITY_MODEL], model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, @@ -334,9 +330,7 @@ def evaluate( self._treatment_models[TREATMENT_EFFECT_MODEL], Xs=[X[w == tv] for tv in range(1, self.n_variants)], ys=imputed_te_treatment, - scorers=scoring.get( - TREATMENT_EFFECT_MODEL, ["neg_root_mean_squared_error"] - ), + scorers=safe_scoring[TREATMENT_EFFECT_MODEL], model_kind=TREATMENT_EFFECT_MODEL, is_oos=is_oos, oos_method=oos_method, @@ -347,7 +341,7 @@ def evaluate( self._treatment_models[CONTROL_EFFECT_MODEL], Xs=[X[w == 0] for _ in range(1, self.n_variants)], ys=imputed_te_control, - scorers=scoring.get(CONTROL_EFFECT_MODEL, ["neg_root_mean_squared_error"]), + scorers=safe_scoring[CONTROL_EFFECT_MODEL], model_kind=CONTROL_EFFECT_MODEL, is_oos=is_oos, oos_method=oos_method, From 12d41b5f389c989c74ac7793522262ba93510406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:27:58 +0200 Subject: [PATCH 19/32] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 2fcdcdf..ea5dcc5 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -879,7 +879,7 @@ def evaluate( for the possible values. * ``Callable`` with signature ``scorer(estimator, X, y_true, **kwargs)``. We recommend using `sklearn.metrics.make_scorer `_ - to create this callables. + to create such a ``Callable``. If some model name is not present in the keys of ``scoring`` then the default used metrics will be ``neg_log_loss`` if it is a classifier and ``neg_root_mean_squared_error`` From 4a36e2513cbb23c55dd66de5d1ca0277cf958d1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:29:08 +0200 Subject: [PATCH 20/32] Update metalearners/tlearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/tlearner.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index 24275fc..875a986 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -118,9 +118,6 @@ def evaluate( ) -> dict[str, float]: safe_scoring = self._scoring(scoring) - masks = [] - for tv in range(self.n_variants): - masks.append(w == tv) return _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], From 5f0987f1492762a3dfde0e6d1613f110bb618fbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:29:27 +0200 Subject: [PATCH 21/32] Update metalearners/xlearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/xlearner.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index 611bc87..319a7ad 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -291,10 +291,6 @@ def evaluate( ) -> dict[str, float]: safe_scoring = self._scoring(scoring) - masks = [] - for tv in range(self.n_variants): - masks.append(w == tv) - variant_outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], From d76dc7447ff3d20e9b473ae40e53385b4c30a8ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:31:39 +0200 Subject: [PATCH 22/32] Update metalearners/metalearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index ea5dcc5..6c1604a 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -144,7 +144,7 @@ def _evaluate_model_kind( scorers: Sequence[str | Callable], model_kind: str, is_oos: bool, - is_treatment: bool, + is_treatment_model: bool, oos_method: OosMethod = OVERALL, ) -> dict[str, float]: """Helper function to evaluate all the models of the same model kind.""" From 05787f9d1652d34b9bd32bd9d16646bb547f143b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:32:32 +0200 Subject: [PATCH 23/32] Rename --- metalearners/metalearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 6c1604a..42c4377 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -158,7 +158,7 @@ def _evaluate_model_kind( scorer_name = f"custom_scorer_{idx}" scorer_callable = scorer for i, cfe in enumerate(cfes): - if is_treatment: + if is_treatment_model: treatment_variant = i + 1 index_str = f"{treatment_variant}_vs_0_" else: From dc946dc0ee042ea1805b1f6d1c4fbb199d6cd33a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:35:05 +0200 Subject: [PATCH 24/32] Rename --- metalearners/drlearner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index ca5dd1a..40e8a09 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -223,7 +223,7 @@ def evaluate( model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) propensity_evaluation = _evaluate_model_kind( @@ -234,7 +234,7 @@ def evaluate( model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) pseudo_outcome: list[np.ndarray] = [] @@ -257,7 +257,7 @@ def evaluate( model_kind=TREATMENT_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=True, + is_treatment_model=True, ) return variant_outcome_evaluation | propensity_evaluation | treatment_evaluation From ba895a350f00c66670aeb86030519cb609326f17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:35:38 +0200 Subject: [PATCH 25/32] Rename --- metalearners/rlearner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index 49258bd..90b4e1f 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -349,7 +349,7 @@ def evaluate( model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) outcome_evaluation = _evaluate_model_kind( @@ -360,7 +360,7 @@ def evaluate( model_kind=OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) # TODO: improve this? generalize it to other metalearners? From e81d152172c3a1d92824f83ea4467f3defc937ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:35:58 +0200 Subject: [PATCH 26/32] Rename --- metalearners/slearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/slearner.py b/metalearners/slearner.py index aacc93b..4e3e2b7 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -172,7 +172,7 @@ def evaluate( model_kind=_BASE_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) def predict_conditional_average_outcomes( From 9d2bbb9b1575ca99a2c05fce053ca989c2c75393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:36:17 +0200 Subject: [PATCH 27/32] Rename --- metalearners/tlearner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index 875a986..11dacaa 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -118,7 +118,6 @@ def evaluate( ) -> dict[str, float]: safe_scoring = self._scoring(scoring) - return _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], @@ -127,5 +126,5 @@ def evaluate( model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) From c4de4f17b26b021860aa7fc62f77577a58c67272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:36:47 +0200 Subject: [PATCH 28/32] Rename --- metalearners/xlearner.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index 319a7ad..839ba57 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -299,7 +299,7 @@ def evaluate( model_kind=VARIANT_OUTCOME_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) propensity_evaluation = _evaluate_model_kind( @@ -310,7 +310,7 @@ def evaluate( model_kind=PROPENSITY_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=False, + is_treatment_model=False, ) imputed_te_control: list[np.ndarray] = [] @@ -330,7 +330,7 @@ def evaluate( model_kind=TREATMENT_EFFECT_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=True, + is_treatment_model=True, ) te_control_evaluation = _evaluate_model_kind( @@ -341,7 +341,7 @@ def evaluate( model_kind=CONTROL_EFFECT_MODEL, is_oos=is_oos, oos_method=oos_method, - is_treatment=True, + is_treatment_model=True, ) return ( From 7fa87940285151fc88b4fd4def4dced461d5b14d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:40:05 +0200 Subject: [PATCH 29/32] Update metalearners/drlearner.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/drlearner.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index 40e8a09..1df7291 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -212,9 +212,6 @@ def evaluate( ) -> dict[str, float]: safe_scoring = self._scoring(scoring) - masks = [] - for tv in range(self.n_variants): - masks.append(w == tv) variant_outcome_evaluation = _evaluate_model_kind( cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL], Xs=[X[w == tv] for tv in range(self.n_variants)], From 8691a02f65484980c76cbe64e1a2c6537e4c5199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= <154450563+FrancescMartiEscofetQC@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:40:12 +0200 Subject: [PATCH 30/32] Update metalearners/_utils.py Co-authored-by: Kevin Klein <7267523+kklein@users.noreply.github.com> --- metalearners/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metalearners/_utils.py b/metalearners/_utils.py index 095c2f8..cf9fde1 100644 --- a/metalearners/_utils.py +++ b/metalearners/_utils.py @@ -468,7 +468,7 @@ def simplify_output_2d(tensor: np.ndarray) -> np.ndarray: # Taken from https://stackoverflow.com/questions/13741998/is-there-a-way-to-let-classes-inherit-the-documentation-of-their-superclass-with def copydoc(fromfunc, sep="\n"): """ - Decorator: Copy the docstring of `fromfunc` + Decorator: Copy the docstring of ``fromfunc`` """ def _decorator(func): From d38e9d52dad37322b4eef9fac03a61927ba2f666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Tue, 25 Jun 2024 17:08:56 +0200 Subject: [PATCH 31/32] Update CHANGELOG --- CHANGELOG.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 42ec926..4526f78 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,13 @@ Changelog ========= +0.6.0 (2024-06-**) +------------------ + +* Added ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and + implemented the abstract method for the :class:`metalearners.XLearner` and + :class:`metalearners.DRLearner`. + 0.5.0 (2024-06-18) ------------------ @@ -30,10 +37,6 @@ Changelog 0.4.0 (2024-06-18) ------------------ -* Added ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and - implemented the abstract method for the :class:`metalearners.XLearner` and - :class:`metalearners.DRLearner`. - * Implemented :meth:`metalearners.cross_fit_estimator.CrossFitEstimator.clone`. * Added ``n_jobs_base_learners`` to :meth:`metalearners.metalearner.MetaLearner.fit`. From c20ae75321c972763c177bf5051f276336eb8924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francesc=20Mart=C3=AD=20Escofet?= Date: Wed, 26 Jun 2024 17:12:55 +0200 Subject: [PATCH 32/32] Add option to evaluate treatment model in RLearner --- metalearners/metalearner.py | 10 +++++- metalearners/rlearner.py | 65 ++++++++++++++++++++++++++++++------- tests/test_learner.py | 7 ++++ 3 files changed, 69 insertions(+), 13 deletions(-) diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index 6a3c260..4bce635 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -146,6 +146,7 @@ def _evaluate_model_kind( is_oos: bool, is_treatment_model: bool, oos_method: OosMethod = OVERALL, + sample_weights: Sequence[Vector] | None = None, ) -> dict[str, float]: """Helper function to evaluate all the models of the same model kind.""" prefix = f"{model_kind}_" @@ -168,7 +169,14 @@ def _evaluate_model_kind( index_str = f"{i}_" name = f"{prefix}{index_str}{scorer_name}" with _PredictContext(cfe, is_oos, oos_method) as modified_cfe: - evaluation_metrics[name] = scorer_callable(modified_cfe, Xs[i], ys[i]) + if sample_weights: + evaluation_metrics[name] = scorer_callable( + modified_cfe, Xs[i], ys[i], sample_weight=sample_weights[i] + ) + else: + evaluation_metrics[name] = scorer_callable( + modified_cfe, Xs[i], ys[i] + ) return evaluation_metrics diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index aec5917..ee6e45d 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -230,6 +230,7 @@ def fit( treatment_variant=treatment_variant, mask=mask, epsilon=epsilon, + is_oos=False, ) X_filtered = index_matrix(X, mask) @@ -337,8 +338,9 @@ def evaluate( scoring: Scoring | None = None, ) -> dict[str, float]: """In the RLearner case, the ``"treatment_model"`` is always evaluated with the - :func:`~metalearners.rlearner.r_loss` and the ``scoring["treatment_model"]`` - parameter is ignored.""" + :func:`~metalearners.rlearner.r_loss` besides the scorers in + ``scoring["treatment_model"]``, which should support passing the + ``sample_weight`` keyword argument.""" safe_scoring = self._scoring(scoring) propensity_evaluation = _evaluate_model_kind( @@ -382,7 +384,39 @@ def evaluate( if self.is_classification: y_hat = y_hat[:, 1] - treatment_evaluation = {} + pseudo_outcome: list[np.ndarray] = [] + sample_weights: list[np.ndarray] = [] + masks: list[Vector] = [] + is_control = w == 0 + for treatment_variant in range(1, self.n_variants): + is_treatment = w == treatment_variant + mask = is_treatment | is_control + tv_pseudo_outcome, tv_sample_weights = self._pseudo_outcome_and_weights( + X=X, + y=y, + w=w, + treatment_variant=treatment_variant, + is_oos=is_oos, + oos_method=oos_method, + mask=mask, + ) + pseudo_outcome.append(tv_pseudo_outcome) + sample_weights.append(tv_sample_weights) + masks.append(mask) + + treatment_evaluation = _evaluate_model_kind( + self._treatment_models[TREATMENT_MODEL], + Xs=[X[masks[tv - 1]] for tv in range(1, self.n_variants)], + ys=pseudo_outcome, + scorers=safe_scoring[TREATMENT_MODEL], + model_kind=TREATMENT_MODEL, + is_oos=is_oos, + oos_method=oos_method, + is_treatment_model=True, + sample_weights=sample_weights, + ) + + rloss_evaluation = {} tau_hat = self.predict(X=X, is_oos=is_oos, oos_method=oos_method) is_control = w == 0 for treatment_variant in range(1, self.n_variants): @@ -397,15 +431,19 @@ def evaluate( if self.is_classification else tau_hat[:, treatment_variant - 1, 0] ) - treatment_evaluation[f"r_loss_{treatment_variant}_vs_0"] = r_loss( + rloss_evaluation[f"r_loss_{treatment_variant}_vs_0"] = r_loss( cate_estimates=cate_estimates[mask], outcome_estimates=y_hat[mask], propensity_scores=propensity_estimates[mask], outcomes=y[mask], treatments=w[mask] == treatment_variant, ) - - return propensity_evaluation | outcome_evaluation | treatment_evaluation + return ( + propensity_evaluation + | outcome_evaluation + | rloss_evaluation + | treatment_evaluation + ) def _pseudo_outcome_and_weights( self, @@ -413,15 +451,13 @@ def _pseudo_outcome_and_weights( y: Vector, w: Vector, treatment_variant: int, + is_oos: bool, + oos_method: OosMethod = OVERALL, mask: Vector | None = None, epsilon: float = _EPSILON, ) -> tuple[np.ndarray, np.ndarray]: """Compute the R-Learner pseudo outcome and corresponding weights. - Importantly, this method assumes to be applied on in-sample data. - In other words, ``is_oos`` will always be set to ``False`` when calling - ``predict_nuisance``. - If ``mask`` is provided, the retuned pseudo outcomes and weights are only with respect the observations that the mask selects. @@ -437,12 +473,17 @@ def _pseudo_outcome_and_weights( # be able to match original observations with their corresponding folds. y_estimates = self.predict_nuisance( X=X, - is_oos=False, + is_oos=is_oos, model_kind=OUTCOME_MODEL, model_ord=0, + oos_method=oos_method, )[mask] w_estimates = self.predict_nuisance( - X=X, is_oos=False, model_kind=PROPENSITY_MODEL, model_ord=0 + X=X, + is_oos=is_oos, + model_kind=PROPENSITY_MODEL, + model_ord=0, + oos_method=oos_method, )[mask] w_estimates_binarized = w_estimates[:, treatment_variant] / ( w_estimates[:, 0] + w_estimates[:, treatment_variant] diff --git a/tests/test_learner.py b/tests/test_learner.py index c72c961..f001eda 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -372,6 +372,10 @@ def test_learner_evaluate( assert ( {f"r_loss_{i}_vs_0" for i in range(1, n_variants)} | {"propensity_model_neg_log_loss"} + | { + f"treatment_model_{i}_vs_0_neg_root_mean_squared_error" + for i in range(1, n_variants) + } ) <= set(evaluation.keys()) elif metalearner == "X": assert "propensity_model_neg_log_loss" in evaluation @@ -446,11 +450,14 @@ def new_score_2(y, y_pred): { "outcome_model": [make_scorer(new_score_2)], "propensity_model": [], + "treatment_model": ["neg_mean_absolute_error"], }, { "outcome_model_custom_scorer_0", "r_loss_1_vs_0", "r_loss_2_vs_0", + "treatment_model_1_vs_0_neg_mean_absolute_error", + "treatment_model_2_vs_0_neg_mean_absolute_error", }, ), (