From ae1130573a31a6844b5e8b9e4fcd80347ae37cb0 Mon Sep 17 00:00:00 2001
From: kklein <kevin.klein@quantco.com>
Date: Mon, 29 Jul 2024 10:12:08 +0200
Subject: [PATCH] Move fixed propensity model to utils.

---
 docs/examples/example_propensity.ipynb | 66 ++++++++++----------------
 metalearners/utils.py                  | 27 +++++++++++
 2 files changed, 51 insertions(+), 42 deletions(-)

diff --git a/docs/examples/example_propensity.ipynb b/docs/examples/example_propensity.ipynb
index 3a35635..ac0c10a 100644
--- a/docs/examples/example_propensity.ipynb
+++ b/docs/examples/example_propensity.ipynb
@@ -59,8 +59,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Creating our own estimator\n",
-    "--------------------------\n",
+    "Using a dummy estimator\n",
+    "-----------------------\n",
     "\n",
     "In this tutorial we will assume that we know that all observations were assigned to the\n",
     "treatment with a fixed probability of 0.3, which is close to the fraction of the observations\n",
@@ -89,43 +89,10 @@
     "dataset, we just use it for illustrational purposes.\n",
     "```\n",
     "\n",
-    "Now we can define our custom ``sklearn``-like classifier. We recommend inheriting from\n",
-    "the ``sklearn`` base classes and following the rules explained in the\n",
-    "[sklearn documentation](https://scikit-learn.org/stable/developers/develop.html) to avoid\n",
-    "having to define helper functions and ensure the correct functionality of the ``metalearners``\n",
-    "library."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from sklearn.base import BaseEstimator, ClassifierMixin\n",
-    "from typing import Any\n",
-    "from typing_extensions import Self\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "\n",
-    "\n",
-    "class FixedPropensityModel(ClassifierMixin, BaseEstimator):\n",
-    "    def __init__(self, propensity_score: float) -> None:\n",
-    "        self.propensity_score = propensity_score\n",
-    "\n",
-    "    def fit(self, X: pd.DataFrame, y: pd.Series) -> Self:\n",
-    "        self.classes_ = np.unique(y.to_numpy())  # sklearn requires this\n",
-    "        return self\n",
-    "\n",
-    "    def predict(self, X: pd.DataFrame) -> np.ndarray[Any, Any]:\n",
-    "        return np.argmax(self.predict_proba(X), axis=1)\n",
-    "\n",
-    "    def predict_proba(self, X: pd.DataFrame) -> np.ndarray[Any, Any]:\n",
-    "        return np.full((len(X), 2), [1 - self.propensity_score, self.propensity_score])"
+    "Now we can use a custom ``sklearn``-like classifier: :class:`~metalearners.utils.FixedBinaryPropensity`.\n",
+    "The latter can be used like any ``sklearn`` classifier but will always return the same propensity,\n",
+    "independently of the observed covariates. This propensity has to be provided at initialization via the\n",
+    "``propensity_score`` parameter."
    ]
   },
   {
@@ -149,11 +116,12 @@
    "outputs": [],
    "source": [
     "from metalearners import RLearner\n",
+    "from metaleanrers.utils import FixedBinaryPropensity\n",
     "from lightgbm import LGBMRegressor\n",
     "\n",
     "rlearner = RLearner(\n",
     "    nuisance_model_factory=LGBMRegressor,\n",
-    "    propensity_model_factory=FixedPropensityModel,\n",
+    "    propensity_model_factory=FixedBinaryPropensity,\n",
     "    treatment_model_factory=LGBMRegressor,\n",
     "    nuisance_model_params={\"verbose\": -1},\n",
     "    propensity_model_params={\"propensity_score\": 0.3},\n",
@@ -205,10 +173,24 @@
   }
  ],
  "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/metalearners/utils.py b/metalearners/utils.py
index 765e6c1..39f052f 100644
--- a/metalearners/utils.py
+++ b/metalearners/utils.py
@@ -1,7 +1,12 @@
 # Copyright (c) QuantCo 2024-2024
 # SPDX-License-Identifier: BSD-3-Clause
 
+from typing import Any
+
 import numpy as np
+import pandas as pd
+from sklearn.base import BaseEstimator, ClassifierMixin
+from typing_extensions import Self
 
 from metalearners.drlearner import DRLearner
 from metalearners.metalearner import MetaLearner
@@ -73,3 +78,25 @@ def simplify_output(tensor: np.ndarray) -> np.ndarray:
     if n_outputs == 2:
         return tensor[:, :, 1].reshape(n_obs, n_variants)
     return tensor
+
+
+class FixedBinaryPropensity(ClassifierMixin, BaseEstimator):
+    """Binary classifier propensity dummy model which outputs a fixed propensity,
+    independently of covariates."""
+
+    def __init__(self, propensity_score: float) -> None:
+        self.propensity_score = propensity_score
+
+    def fit(self, X: pd.DataFrame, y: pd.Series) -> Self:
+        self.classes_ = np.unique(y.to_numpy())  # sklearn requires this
+        if (n_classes := len(self.classes_)) > 2:
+            raise ValueError(
+                f"FixedBinaryPropensityModel only supports binary outcomes but {n_classes} were provided ."
+            )
+        return self
+
+    def predict(self, X: pd.DataFrame) -> np.ndarray[Any, Any]:
+        return np.argmax(self.predict_proba(X), axis=1)
+
+    def predict_proba(self, X: pd.DataFrame) -> np.ndarray[Any, Any]:
+        return np.full((len(X), 2), [1 - self.propensity_score, self.propensity_score])