From ae1130573a31a6844b5e8b9e4fcd80347ae37cb0 Mon Sep 17 00:00:00 2001 From: kklein Date: Mon, 29 Jul 2024 10:12:08 +0200 Subject: [PATCH] Move fixed propensity model to utils. --- docs/examples/example_propensity.ipynb | 66 ++++++++++---------------- metalearners/utils.py | 27 +++++++++++ 2 files changed, 51 insertions(+), 42 deletions(-) diff --git a/docs/examples/example_propensity.ipynb b/docs/examples/example_propensity.ipynb index 3a35635..ac0c10a 100644 --- a/docs/examples/example_propensity.ipynb +++ b/docs/examples/example_propensity.ipynb @@ -59,8 +59,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Creating our own estimator\n", - "--------------------------\n", + "Using a dummy estimator\n", + "-----------------------\n", "\n", "In this tutorial we will assume that we know that all observations were assigned to the\n", "treatment with a fixed probability of 0.3, which is close to the fraction of the observations\n", @@ -89,43 +89,10 @@ "dataset, we just use it for illustrational purposes.\n", "```\n", "\n", - "Now we can define our custom ``sklearn``-like classifier. We recommend inheriting from\n", - "the ``sklearn`` base classes and following the rules explained in the\n", - "[sklearn documentation](https://scikit-learn.org/stable/developers/develop.html) to avoid\n", - "having to define helper functions and ensure the correct functionality of the ``metalearners``\n", - "library." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "from sklearn.base import BaseEstimator, ClassifierMixin\n", - "from typing import Any\n", - "from typing_extensions import Self\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "\n", - "class FixedPropensityModel(ClassifierMixin, BaseEstimator):\n", - " def __init__(self, propensity_score: float) -> None:\n", - " self.propensity_score = propensity_score\n", - "\n", - " def fit(self, X: pd.DataFrame, y: pd.Series) -> Self:\n", - " self.classes_ = np.unique(y.to_numpy()) # sklearn requires this\n", - " return self\n", - "\n", - " def predict(self, X: pd.DataFrame) -> np.ndarray[Any, Any]:\n", - " return np.argmax(self.predict_proba(X), axis=1)\n", - "\n", - " def predict_proba(self, X: pd.DataFrame) -> np.ndarray[Any, Any]:\n", - " return np.full((len(X), 2), [1 - self.propensity_score, self.propensity_score])" + "Now we can use a custom ``sklearn``-like classifier: :class:`~metalearners.utils.FixedBinaryPropensity`.\n", + "The latter can be used like any ``sklearn`` classifier but will always return the same propensity,\n", + "independently of the observed covariates. This propensity has to be provided at initialization via the\n", + "``propensity_score`` parameter." ] }, { @@ -149,11 +116,12 @@ "outputs": [], "source": [ "from metalearners import RLearner\n", + "from metaleanrers.utils import FixedBinaryPropensity\n", "from lightgbm import LGBMRegressor\n", "\n", "rlearner = RLearner(\n", " nuisance_model_factory=LGBMRegressor,\n", - " propensity_model_factory=FixedPropensityModel,\n", + " propensity_model_factory=FixedBinaryPropensity,\n", " treatment_model_factory=LGBMRegressor,\n", " nuisance_model_params={\"verbose\": -1},\n", " propensity_model_params={\"propensity_score\": 0.3},\n", @@ -205,10 +173,24 @@ } ], "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/metalearners/utils.py b/metalearners/utils.py index 765e6c1..39f052f 100644 --- a/metalearners/utils.py +++ b/metalearners/utils.py @@ -1,7 +1,12 @@ # Copyright (c) QuantCo 2024-2024 # SPDX-License-Identifier: BSD-3-Clause +from typing import Any + import numpy as np +import pandas as pd +from sklearn.base import BaseEstimator, ClassifierMixin +from typing_extensions import Self from metalearners.drlearner import DRLearner from metalearners.metalearner import MetaLearner @@ -73,3 +78,25 @@ def simplify_output(tensor: np.ndarray) -> np.ndarray: if n_outputs == 2: return tensor[:, :, 1].reshape(n_obs, n_variants) return tensor + + +class FixedBinaryPropensity(ClassifierMixin, BaseEstimator): + """Binary classifier propensity dummy model which outputs a fixed propensity, + independently of covariates.""" + + def __init__(self, propensity_score: float) -> None: + self.propensity_score = propensity_score + + def fit(self, X: pd.DataFrame, y: pd.Series) -> Self: + self.classes_ = np.unique(y.to_numpy()) # sklearn requires this + if (n_classes := len(self.classes_)) > 2: + raise ValueError( + f"FixedBinaryPropensityModel only supports binary outcomes but {n_classes} were provided ." + ) + return self + + def predict(self, X: pd.DataFrame) -> np.ndarray[Any, Any]: + return np.argmax(self.predict_proba(X), axis=1) + + def predict_proba(self, X: pd.DataFrame) -> np.ndarray[Any, Any]: + return np.full((len(X), 2), [1 - self.propensity_score, self.propensity_score])