From 5ddf5533aebaedbceee53d53ef8fd9ca8b7d70ee Mon Sep 17 00:00:00 2001 From: Apoorva Lal Date: Tue, 6 Aug 2024 10:33:07 -0700 Subject: [PATCH] Revert "Move fixed propensity model to utils. (#72)" This reverts commit 3b2777c101eb382593164e2db8401c3acca75fd8. --- CHANGELOG.rst | 6 +-- docs/examples/example_propensity.ipynb | 66 ++++++++++++++++---------- metalearners/utils.py | 32 ------------- tests/test_utils.py | 61 +----------------------- 4 files changed, 45 insertions(+), 120 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a89a0839..c5b1a50e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,9 +12,7 @@ Changelog **New features** -* Add :meth:`metalearners.metalearner.MetaLearner.init_params`. - -* Add :class:`metalearners.utils.FixedBinaryPropensity`. +* Added :meth:`metalearners.metalearner.MetaLearner.init_params`. * Added :meth:`metalearners.metalearner.DRLearner.treatment_effect` to compute AIPW point estimate and standard error for _average treatment effects (ATE)_ without requiring a full model fit (which is required for CATE estimation). A new notebook contains examples. @@ -23,7 +21,7 @@ Changelog **New features** -* Add :meth:`metalearners.metalearner.MetaLearner.fit_all_nuisance` and +* Added :meth:`metalearners.metalearner.MetaLearner.fit_all_nuisance` and :meth:`metalearners.metalearner.MetaLearner.fit_all_treatment`. * Add optional ``store_raw_results`` and ``store_results`` parameters to :class:`metalearners.grid_search.MetaLearnerGridSearch`. diff --git a/docs/examples/example_propensity.ipynb b/docs/examples/example_propensity.ipynb index 7fc280f8..3a356351 100644 --- a/docs/examples/example_propensity.ipynb +++ b/docs/examples/example_propensity.ipynb @@ -59,8 +59,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Using a dummy estimator\n", - "-----------------------\n", + "Creating our own estimator\n", + "--------------------------\n", "\n", "In this tutorial we will assume that we know that all observations were assigned to the\n", "treatment with a fixed probability of 0.3, which is close to the fraction of the observations\n", @@ -89,10 +89,43 @@ "dataset, we just use it for illustrational purposes.\n", "```\n", "\n", - "Now we can use a custom ``sklearn``-like classifier: {class}`~metalearners.utils.FixedBinaryPropensity`.\n", - "The latter can be used like any ``sklearn`` classifier but will always return the same propensity,\n", - "independently of the observed covariates. This propensity has to be provided at initialization via the\n", - "``propensity_score`` parameter." + "Now we can define our custom ``sklearn``-like classifier. We recommend inheriting from\n", + "the ``sklearn`` base classes and following the rules explained in the\n", + "[sklearn documentation](https://scikit-learn.org/stable/developers/develop.html) to avoid\n", + "having to define helper functions and ensure the correct functionality of the ``metalearners``\n", + "library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from sklearn.base import BaseEstimator, ClassifierMixin\n", + "from typing import Any\n", + "from typing_extensions import Self\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "\n", + "class FixedPropensityModel(ClassifierMixin, BaseEstimator):\n", + " def __init__(self, propensity_score: float) -> None:\n", + " self.propensity_score = propensity_score\n", + "\n", + " def fit(self, X: pd.DataFrame, y: pd.Series) -> Self:\n", + " self.classes_ = np.unique(y.to_numpy()) # sklearn requires this\n", + " return self\n", + "\n", + " def predict(self, X: pd.DataFrame) -> np.ndarray[Any, Any]:\n", + " return np.argmax(self.predict_proba(X), axis=1)\n", + "\n", + " def predict_proba(self, X: pd.DataFrame) -> np.ndarray[Any, Any]:\n", + " return np.full((len(X), 2), [1 - self.propensity_score, self.propensity_score])" ] }, { @@ -116,12 +149,11 @@ "outputs": [], "source": [ "from metalearners import RLearner\n", - "from metalearners.utils import FixedBinaryPropensity\n", "from lightgbm import LGBMRegressor\n", "\n", "rlearner = RLearner(\n", " nuisance_model_factory=LGBMRegressor,\n", - " propensity_model_factory=FixedBinaryPropensity,\n", + " propensity_model_factory=FixedPropensityModel,\n", " treatment_model_factory=LGBMRegressor,\n", " nuisance_model_params={\"verbose\": -1},\n", " propensity_model_params={\"propensity_score\": 0.3},\n", @@ -173,24 +205,10 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" + "name": "python" } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 2 } diff --git a/metalearners/utils.py b/metalearners/utils.py index 587bef67..765e6c1d 100644 --- a/metalearners/utils.py +++ b/metalearners/utils.py @@ -1,14 +1,8 @@ # Copyright (c) QuantCo 2024-2024 # SPDX-License-Identifier: BSD-3-Clause -from typing import Any - import numpy as np -import pandas as pd -from sklearn.base import BaseEstimator, ClassifierMixin -from typing_extensions import Self -from metalearners._typing import Matrix, Vector from metalearners.drlearner import DRLearner from metalearners.metalearner import MetaLearner from metalearners.rlearner import RLearner @@ -79,29 +73,3 @@ def simplify_output(tensor: np.ndarray) -> np.ndarray: if n_outputs == 2: return tensor[:, :, 1].reshape(n_obs, n_variants) return tensor - - -class FixedBinaryPropensity(ClassifierMixin, BaseEstimator): - """Binary classifier propensity dummy model which outputs a fixed propensity, - independently of covariates.""" - - def __init__(self, propensity_score: float) -> None: - if not 0 <= propensity_score <= 1: - raise ValueError( - f"Expected a propensity score between 0 and 1 but got {propensity_score}." - ) - self.propensity_score = propensity_score - - def fit(self, X: Matrix, y: Vector) -> Self: - self.classes_ = np.unique(y) # sklearn requires this - if (n_classes := len(self.classes_)) > 2: - raise ValueError( - f"FixedBinaryPropensityModel only supports binary outcomes but {n_classes} were provided ." - ) - return self - - def predict(self, X: Matrix) -> np.ndarray[Any, Any]: - return np.argmax(self.predict_proba(X), axis=1) - - def predict_proba(self, X: pd.DataFrame) -> np.ndarray[Any, Any]: - return np.full((len(X), 2), [1 - self.propensity_score, self.propensity_score]) diff --git a/tests/test_utils.py b/tests/test_utils.py index d88b5d14..c3f334ec 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,16 +2,11 @@ # SPDX-License-Identifier: BSD-3-Clause import numpy as np -import pandas as pd import pytest from lightgbm import LGBMRegressor from metalearners.metalearner import MetaLearner -from metalearners.utils import ( - FixedBinaryPropensity, - metalearner_factory, - simplify_output, -) +from metalearners.utils import metalearner_factory, simplify_output @pytest.mark.parametrize("prefix", ["T"]) @@ -57,57 +52,3 @@ def test_simplify_output(input, expected): def test_simplify_output_raises(input): with pytest.raises(ValueError, match="needs to be 3-dimensional"): simplify_output(input) - - -@pytest.mark.parametrize("use_pd", [True, False]) -def test_fixed_binary_propensity(use_pd): - propensity_score = 0.3 - dominant_class = propensity_score >= 0.5 - - model = FixedBinaryPropensity(propensity_score=propensity_score) - - n_samples = 5 - X_train = np.ones((n_samples, 5)) - y_train = np.ones(n_samples) - if use_pd: - X_train = pd.DataFrame(X_train) - y_train = pd.Series(y_train) - - model.fit(X_train, y_train) - - n_test_samples = 3 - X_test = np.zeros(n_test_samples) - - class_predictions = model.predict(X_test) - assert np.array_equal( - class_predictions, np.array(np.ones(n_test_samples) * dominant_class) - ) - - probability_estimates = model.predict_proba(X_test) - assert np.array_equal( - probability_estimates, - np.column_stack( - ( - np.ones(n_test_samples) * (1 - propensity_score), - np.ones(n_test_samples) * propensity_score, - ) - ), - ) - - -@pytest.mark.parametrize("propensity_score", [-1, 100, 1.1]) -def test_fixed_binary_propensity_not_a_propbability(propensity_score): - with pytest.raises(ValueError, match="between 0 and 1 but got"): - FixedBinaryPropensity(propensity_score=propensity_score) - - -def test_fixed_binary_propensity_non_binary(): - propensity_score = 0.3 - - model = FixedBinaryPropensity(propensity_score=propensity_score) - - n_samples = 5 - X_train = np.ones((n_samples, 5)) - y_train = np.fromiter(range(n_samples), dtype=int) - with pytest.raises(ValueError, match="only supports binary outcomes"): - model.fit(X_train, y_train)