diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 02d971c..f9d98ef 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,7 @@ jobs: uses: prefix-dev/setup-pixi@v0.8.1 - name: Run mypy run: | - pixi run jupyter nbconvert --to script docs/examples/*.ipynb + pixi run jupyter nbconvert --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags no-convert --to script docs/examples/*.ipynb for file in docs/examples/*.txt; do mv -- "$file" "${file%.txt}.py"; done pixi run mypy docs/examples/*.py diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml index eda2da2..2fcfa4d 100644 --- a/.github/workflows/package.yml +++ b/.github/workflows/package.yml @@ -1,5 +1,9 @@ name: Package -on: [push] +on: + push: + release: + types: + - published concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -26,6 +30,10 @@ jobs: name: Upload to PyPI needs: [build] runs-on: ubuntu-latest + permissions: + id-token: write + contents: write + environment: pypi if: github.event_name == 'release' && github.event.action == 'published' steps: - uses: actions/download-artifact@v4 diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a42a342..d04fad1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,14 +7,30 @@ Changelog ========= -0.6.0 (2024-06-**) +0.7.0 (2024-07-12) +------------------ + +**New features** + +* Add optional ``adaptive_clipping`` parameter to :class:`metalearners.DRLearner`. + +**Other changes** + +* Change the index columns order in ``MetaLearnerGridSearch.results_``. + +* Raise a custom error if only one class is present in a classification outcome. + +* Raise a custom error if there are some treatment variants which have seen classification outcomes which have not appeared for some other treatment variant. + + +0.6.0 (2024-07-08) ------------------ **New features** * Implement :class:`metalearners.grid_search.MetaLearnerGridSearch`. -* Add ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and +* Add a ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and implement the abstract method for the :class:`metalearners.XLearner` and :class:`metalearners.DRLearner`. diff --git a/docs/_static/custom.css b/docs/_static/custom.css new file mode 100644 index 0000000..b07ad9c --- /dev/null +++ b/docs/_static/custom.css @@ -0,0 +1,30 @@ +/* Copied from https://github.com/executablebooks/MyST-NB/issues/453 */ +div.cell.tag_scroll-output div.cell_output { + max-height: 24em; + overflow-y: auto; + max-width: 100%; + overflow-x: auto; +} + +div.cell.tag_scroll-output div.cell_output::-webkit-scrollbar { + width: 0.3rem; + height: 0.3rem; +} + +div.cell.tag_scroll-output div.cell_output::-webkit-scrollbar-thumb { + background: #c1c1c1; + border-radius: 0.25rem; +} + +div.cell.tag_scroll-output div.cell_output::-webkit-scrollbar-thumb:hover { + background: #a0a0a0; +} + +@media print { + div.cell.tag_scroll-output div.cell_output { + max-height: unset; + overflow-y: visible; + max-width: unset; + overflow-x: visible; + } +} diff --git a/docs/conf.py b/docs/conf.py index c6cdcf1..bb8effc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -68,6 +68,9 @@ numpydoc_show_class_members = False +html_css_files = ["custom.css"] + + # Copied and adapted from # https://github.com/pandas-dev/pandas/blob/4a14d064187367cacab3ff4652a12a0e45d0711b/doc/source/conf.py#L613-L659 # Required configuration function to use sphinx.ext.linkcode diff --git a/docs/examples/example_gridsearch.ipynb b/docs/examples/example_gridsearch.ipynb new file mode 100644 index 0000000..586c5d0 --- /dev/null +++ b/docs/examples/example_gridsearch.ipynb @@ -0,0 +1,349 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-cell", + "no-convert" + ], + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "%%html\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "(example-grid-search)=\n", + "\n", + "# Tuning hyperparameters of a MetaLearner with ``MetaLearnerGridSearch``\n", + "\n", + "Motivation\n", + "----------\n", + "\n", + "We know that model selection and/or hyperparameter optimization (HPO) can\n", + "have massive impacts on the prediction quality in regular Machine\n", + "Learning. Yet, it seems that model selection and hyperparameter\n", + "optimization are of substantial importance for CATE estimation with\n", + "MetaLearners, too, see e.g. [Machlanski et. al](https://arxiv.org/abs/2303.01412>).\n", + "\n", + "However, model selection and HPO for MetaLearners look quite different from what we're used to from e.g. simple supervised learning problems. Concretely,\n", + "\n", + "* In terms of a MetaLearners's option space, there are several levels\n", + " to optimize for:\n", + "\n", + " 1. The MetaLearner architecture, e.g. R-Learner vs DR-Learner\n", + " 2. The model to choose per base estimator of said MetaLearner architecture, e.g. ``LogisticRegression`` vs ``LGBMClassifier``\n", + " 3. The model hyperparameters per base model\n", + "\n", + "* On a conceptual level, it's not clear how to measure model quality\n", + " for MetaLearners. As a proxy for the underlying quantity of\n", + " interest one might look into base model performance, the R-Loss of\n", + " the CATE estimates or some more elaborate approaches alluded to by\n", + " [Machlanski et. al](https://arxiv.org/abs/2303.01412).\n", + "\n", + "We think that HPO can be divided into two camps:\n", + "\n", + "* Exploration of (hyperparameter, metric evaluation) pairs where the\n", + " pairs do not influence each other (e.g. grid search, random search)\n", + "\n", + "* Exploration of (hyperparameter, metric evaluation) pairs where the\n", + " pairs do influence each other (e.g. Bayesian optimization,\n", + " evolutionary algorithms); in other words, there is a feedback-loop between\n", + " sample result and sample\n", + "\n", + "In this example, we will illustrate the former and how one can make use of\n", + "{class}`~metalearners.grid_search.MetaLearnerGridSearch` for it. For the latter please\n", + "refer to the {ref}`example on model selection with optuna`.\n", + "\n", + "Loading the data\n", + "----------------\n", + "\n", + "Just like in our {ref}`example on estimating CATEs with a MetaLearner\n", + "`, we will first load some experiment data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from pathlib import Path\n", + "from git_root import git_root\n", + "\n", + "df = pd.read_csv(git_root(\"data/learning_mindset.zip\"))\n", + "outcome_column = \"achievement_score\"\n", + "treatment_column = \"intervention\"\n", + "feature_columns = [\n", + " column for column in df.columns if column not in [outcome_column, treatment_column]\n", + "]\n", + "categorical_feature_columns = [\n", + " \"ethnicity\",\n", + " \"gender\",\n", + " \"frst_in_family\",\n", + " \"school_urbanicity\",\n", + " \"schoolid\",\n", + "]\n", + "# Note that explicitly setting the dtype of these features to category\n", + "# allows both lightgbm as well as shap plots to\n", + "# 1. Operate on features which are not of type int, bool or float\n", + "# 2. Correctly interpret categoricals with int values to be\n", + "# interpreted as categoricals, as compared to ordinals/numericals.\n", + "for categorical_feature_column in categorical_feature_columns:\n", + " df[categorical_feature_column] = df[categorical_feature_column].astype(\"category\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we've loaded the experiment data, we can split it up into\n", + "train and validation data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "X_train, X_validation, y_train, y_validation, w_train, w_validation = train_test_split(\n", + " df[feature_columns], df[outcome_column], df[treatment_column], test_size=0.25\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Performing the grid search\n", + "--------------------------\n", + "\n", + "We can run a grid search by using the {class}`~metalearners.grid_search.MetaLearnerGridSearch`\n", + "class. However, it's important to note that this class only supports a single MetaLearner\n", + "architecture at a time. If you're interested in conducting a grid search across multiple architectures,\n", + "it will require several grid searches.\n", + "\n", + "Let's say we want to work with a {class}`~metalearners.DRLearner`. We can check the names of\n", + "the base models for this architecture with the following code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from metalearners import DRLearner\n", + "\n", + "print(DRLearner.nuisance_model_specifications().keys())\n", + "print(DRLearner.treatment_model_specifications().keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "We see that this MetaLearner contains three base models: ``\"variant_outcome_model\"``,\n", + "``\"propensity_model\"`` and ``\"treatment_model\"``.\n", + "\n", + "Since our problem has a regression outcome, the ``\"variant_outcome_model\"`` should be a regressor.\n", + "The ``\"propensity_model\"`` and ``\"treatment_model\"`` are always a classifier and a regressor\n", + "respectively.\n", + "\n", + "To instantiate the {class}`~metalearners.grid_search.MetaLearnerGridSearch` object we need to\n", + "specify the different base models to be used. Moreover, if we'd like to use non-default hyperparameters for a given base model, we need to specify those, too.\n", + "\n", + "In this tutorial we test a ``LinearRegression`` and ``LGBMRegressor`` for the outcome model,\n", + "a ``LGBMClassifier`` and ``QuadraticDiscriminantAnalysis`` for the propensity model and a\n", + "``LGBMRegressor`` for the treatment model.\n", + "\n", + "Finally we can define the hyperparameters to test for the base models using the ``param_grid``\n", + "parameter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from metalearners.grid_search import MetaLearnerGridSearch\n", + "from lightgbm import LGBMClassifier, LGBMRegressor\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n", + "\n", + "gs = MetaLearnerGridSearch(\n", + " metalearner_factory=DRLearner,\n", + " metalearner_params={\"is_classification\": False, \"n_variants\": 2},\n", + " base_learner_grid={\n", + " \"variant_outcome_model\": [LinearRegression, LGBMRegressor],\n", + " \"propensity_model\": [LGBMClassifier, QuadraticDiscriminantAnalysis],\n", + " \"treatment_model\": [LGBMRegressor],\n", + " },\n", + " param_grid={\n", + " \"variant_outcome_model\": {\n", + " \"LGBMRegressor\": {\"n_estimators\": [3, 5], \"verbose\": [-1]}\n", + " },\n", + " \"treatment_model\": {\"LGBMRegressor\": {\"n_estimators\": [1, 2], \"verbose\": [-1]}},\n", + " \"propensity_model\": {\n", + " \"LGBMClassifier\": {\"n_estimators\": [1, 2, 3], \"verbose\": [-1]}\n", + " },\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can call {meth}`~metalearners.grid_search.MetaLearnerGridSearch.fit` with the train\n", + "and validation data and can inspect the results ``DataFrame`` in ``results_``." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "scroll-output" + ], + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "gs.fit(X_train, y_train, w_train, X_validation, y_validation, w_validation)\n", + "gs.results_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reusing base models\n", + "--------------------\n", + "In order to decrease the grid search runtime, it may sometimes be desirable to reuse some nuisance models.\n", + "We refer to our {ref}`example of model reusage ` for a more in depth explanation\n", + "on how this can be achieved, but here we'll show an example for the integration of model\n", + "reusage with {class}`~metalearners.grid_search.MetaLearnerGridSearch`.\n", + "\n", + "We will reuse the ``\"variant_outcome_model\"`` of a {class}`~metalearners.TLearner` for\n", + "a grid search over the {class}`~metalearners.XLearner`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "scroll-output" + ], + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from metalearners import TLearner, XLearner\n", + "\n", + "tl = TLearner(\n", + " False,\n", + " 2,\n", + " LGBMRegressor,\n", + " nuisance_model_params={\"verbose\": -1, \"n_estimators\": 20, \"learning_rate\": 0.05},\n", + " n_folds=2,\n", + ")\n", + "tl.fit(X_train, y_train, w_train)\n", + "\n", + "gs = MetaLearnerGridSearch(\n", + " metalearner_factory=XLearner,\n", + " metalearner_params={\n", + " \"is_classification\": False,\n", + " \"n_variants\": 2,\n", + " \"n_folds\": 5, # The number of folds does not need to be the same as in the TLearner\n", + " \"fitted_nuisance_models\": {\n", + " \"variant_outcome_model\": tl._nuisance_models[\"variant_outcome_model\"]\n", + " },\n", + " },\n", + " base_learner_grid={\n", + " \"propensity_model\": [LGBMClassifier],\n", + " \"control_effect_model\": [LGBMRegressor, LinearRegression],\n", + " \"treatment_effect_model\": [LGBMRegressor, LinearRegression],\n", + " },\n", + " param_grid={\n", + " \"propensity_model\": {\"LGBMClassifier\": {\"n_estimators\": [5], \"verbose\": [-1]}},\n", + " \"treatment_effect_model\": {\n", + " \"LGBMRegressor\": {\"n_estimators\": [5, 10], \"verbose\": [-1]}\n", + " },\n", + " \"control_effect_model\": {\n", + " \"LGBMRegressor\": {\"n_estimators\": [1, 3], \"verbose\": [-1]}\n", + " },\n", + " },\n", + ")\n", + "\n", + "gs.fit(X_train, y_train, w_train, X_validation, y_validation, w_validation)\n", + "gs.results_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Further comments\n", + "-------------------\n", + "* We strongly recommend only reusing base models if they have been trained on\n", + " exactly the same data. If this is not the case, some functionalities\n", + " will probably not work as hoped for." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/examples/example_lime.ipynb b/docs/examples/example_lime.ipynb index d3c97ee..e967df8 100644 --- a/docs/examples/example_lime.ipynb +++ b/docs/examples/example_lime.ipynb @@ -217,10 +217,10 @@ "source": [ "### Generating lime plots\n", "\n", - "``lime`` will expect a function which consumes an ``X`` and returns\n", + "``lime`` will expect a function which consumes a ``np.ndarray`` ``X`` and returns\n", "a one-dimensional vector of the same length as ``X``. We'll have to\n", "adapt the {meth}`~metalearners.rlearner.RLearner.predict` method of\n", - "our {class}`~metalearners.rlearner.RLearner` in two ways:\n", + "our {class}`~metalearners.rlearner.RLearner` in three ways:\n", "\n", "* We need to pass a value for the necessary parameter ``is_oos`` to {meth}`~metalearners.rlearner.RLearner.predict`.\n", "\n", @@ -228,6 +228,10 @@ " {meth}`~metalearners.rlearner.RLearner.predict` to be one-dimensional. This\n", " we can easily achieve via {func}`metalearners.utils.simplify_output`.\n", "\n", + "* We need to reconvert the ``np.ndarray`` to a ``pd.DataFrame`` to work with categoricals\n", + " and specify the correct categories so the categorical codes are the same (which are used internally in LightGBM),\n", + " see [this issue](https://github.com/microsoft/LightGBM/issues/5162) for more context.\n", + "\n", "This we can do as follows:" ] }, @@ -244,7 +248,11 @@ "from metalearners.utils import simplify_output\n", "\n", "def predict(X):\n", - " return simplify_output(rlearner.predict(X, is_oos=True))" + " X_pd = pd.DataFrame(X, copy=True)\n", + " for c in X_pd.columns:\n", + " # This line sets the cat.categories correctly (even if not all are present in X)\n", + " X_pd[c] = X_pd[c].astype(df[feature_columns].iloc[:, c].dtype)\n", + " return simplify_output(rlearner.predict(X_pd, is_oos=True))" ] }, { @@ -254,26 +262,7 @@ "where we set ``is_oos=True`` since ``lime`` will call\n", "{meth}`~metalearners.rlearner.RLearner.predict`\n", "with various inputs which will not be able to be recognized as\n", - "in-sample data.\n", - "\n", - "Since ``lime`` expects ``numpy`` datastructures, we'll have to\n", - "manually encode the categorical features of our ``pandas`` data\n", - "structure, see [this issue](https://github.com/microsoft/LightGBM/issues/5162) for more context." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "X = df[feature_columns].copy()\n", - "for categorical_feature_column in categorical_feature_columns:\n", - " X[categorical_feature_column] = X[categorical_feature_column].cat.codes" + "in-sample data." ] }, { @@ -332,10 +321,8 @@ "from lime.lime_tabular import LimeTabularExplainer\n", "from lime.submodular_pick import SubmodularPick\n", "\n", - "X = X.to_numpy()\n", - "\n", "explainer = LimeTabularExplainer(\n", - " X,\n", + " df[feature_columns].to_numpy(),\n", " feature_names=feature_columns,\n", " categorical_features=categorical_feature_indices,\n", " categorical_names=categorical_names,\n", @@ -345,7 +332,7 @@ ")\n", "\n", "sp = SubmodularPick(\n", - " data=X,\n", + " data=df[feature_columns].to_numpy(),\n", " explainer=explainer,\n", " predict_fn=predict,\n", " method=\"sample\",\n", diff --git a/docs/examples/example_optuna.ipynb b/docs/examples/example_optuna.ipynb index 1d87f94..2479cf2 100644 --- a/docs/examples/example_optuna.ipynb +++ b/docs/examples/example_optuna.ipynb @@ -46,6 +46,7 @@ "In this example, we will illustrate the latter camp based on an\n", "application of [optuna](https://github.com/optuna/optuna) -- a\n", "popular framework for HPO -- in interplay with ``metalearners``.\n", + "For the former please refer to the {ref}`example on hyperparameter tuning with MetaLearnerGridSearch`.\n", "\n", "Installation\n", "------------\n", diff --git a/docs/examples/index.rst b/docs/examples/index.rst index 629a177..d825a99 100644 --- a/docs/examples/index.rst +++ b/docs/examples/index.rst @@ -10,4 +10,5 @@ Examples Explainability: Lime plots of MetaLearners Explainability: Feature importance and SHAP values Model selection with optuna + Tuning hyperparameters of a MetaLearner with MetaLearnerGridSearch Generating data diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py index 7c898b5..ca1d4e6 100644 --- a/metalearners/drlearner.py +++ b/metalearners/drlearner.py @@ -8,7 +8,16 @@ from joblib import Parallel, delayed from typing_extensions import Self -from metalearners._typing import Matrix, OosMethod, Scoring, Vector +from metalearners._typing import ( + Features, + Matrix, + ModelFactory, + OosMethod, + Params, + Scoring, + Vector, + _ScikitModel, +) from metalearners._utils import ( check_onnx_installed, check_spox_installed, @@ -22,7 +31,7 @@ validate_valid_treatment_variant_not_control, warning_experimental_feature, ) -from metalearners.cross_fit_estimator import OVERALL +from metalearners.cross_fit_estimator import OVERALL, CrossFitEstimator from metalearners.metalearner import ( NUISANCE, PROPENSITY_MODEL, @@ -57,6 +66,9 @@ class DRLearner(_ConditionalAverageOutcomeMetaLearner): * ``"treatment_model"`` which estimates :math:`\mathbb{E}[Y(k) - Y(0) | X]` + If ``adaptive_clipping`` is set to ``True``, then the pseudo outcomes are computed using + adaptive propensity clipping described in section 4.1, equation *DR-Switch* of + `Mahajan et al. (2024) `_. """ @classmethod @@ -89,6 +101,40 @@ def _supports_multi_treatment(cls) -> bool: def _supports_multi_class(cls) -> bool: return False + def __init__( + self, + is_classification: bool, + n_variants: int, + nuisance_model_factory: ModelFactory | None = None, + treatment_model_factory: ModelFactory | None = None, + propensity_model_factory: type[_ScikitModel] | None = None, + nuisance_model_params: Params | dict[str, Params] | None = None, + treatment_model_params: Params | dict[str, Params] | None = None, + propensity_model_params: Params | None = None, + fitted_nuisance_models: dict[str, list[CrossFitEstimator]] | None = None, + fitted_propensity_model: CrossFitEstimator | None = None, + feature_set: Features | dict[str, Features] | None = None, + n_folds: int | dict[str, int] = 10, + random_state: int | None = None, + adaptive_clipping: bool = False, + ): + super().__init__( + nuisance_model_factory=nuisance_model_factory, + is_classification=is_classification, + n_variants=n_variants, + treatment_model_factory=treatment_model_factory, + propensity_model_factory=propensity_model_factory, + nuisance_model_params=nuisance_model_params, + treatment_model_params=treatment_model_params, + propensity_model_params=propensity_model_params, + fitted_nuisance_models=fitted_nuisance_models, + fitted_propensity_model=fitted_propensity_model, + feature_set=feature_set, + n_folds=n_folds, + random_state=random_state, + ) + self.adaptive_clipping = adaptive_clipping + def fit( self, X: Matrix, @@ -100,7 +146,7 @@ def fit( n_jobs_base_learners: int | None = None, ) -> Self: self._validate_treatment(w) - self._validate_outcome(y) + self._validate_outcome(y, w) self._treatment_variants_indices = [] @@ -324,6 +370,14 @@ def _pseudo_outcome( - y0_estimate ) + if self.adaptive_clipping: + t_pseudo_outcome = y1_estimate - y0_estimate + pseudo_outcome = np.where( + propensity_estimates.min(axis=1) < epsilon, + t_pseudo_outcome, + pseudo_outcome, + ) + return pseudo_outcome @classmethod diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py index 1d85f63..cc9c732 100644 --- a/metalearners/grid_search.py +++ b/metalearners/grid_search.py @@ -83,12 +83,12 @@ def _format_results(results: Sequence[_GSResult]) -> pd.DataFrame: for result in results: row: dict[str, str | int | float] = {} row["metalearner"] = result.metalearner.__class__.__name__ - nuisance_models = ( + nuisance_models = sorted( set(result.metalearner.nuisance_model_specifications().keys()) - result.metalearner._prefitted_nuisance_models ) - treatment_models = set( - result.metalearner.treatment_model_specifications().keys() + treatment_models = sorted( + set(result.metalearner.treatment_model_specifications().keys()) ) for model_kind in nuisance_models: row[model_kind] = result.metalearner.nuisance_model_factory[ @@ -115,13 +115,16 @@ def _format_results(results: Sequence[_GSResult]) -> pd.DataFrame: row[f"test_{name}"] = value rows.append(row) df = pd.DataFrame(rows) - index_columns = [ - c - for c in df.columns - if not c.endswith("_time") - and not c.startswith("train_") - and not c.startswith("test_") - ] + sorted_cols = sorted(df.columns) + index_columns = ["metalearner"] + for model_kind in nuisance_models: + for c in sorted_cols: + if c.startswith(model_kind): + index_columns.append(c) + for model_kind in treatment_models: + for c in sorted_cols: + if c.startswith(model_kind): + index_columns.append(c) df = df.set_index(index_columns) return df diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index e9be774..093106b 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -318,7 +318,7 @@ def _validate_treatment(self, w: Vector) -> None: f"Yet we found the values {set(np.unique(w))}." ) - def _validate_outcome(self, y: Vector) -> None: + def _validate_outcome(self, y: Vector, w: Vector) -> None: if ( self.is_classification and not self._supports_multi_class() @@ -328,6 +328,17 @@ def _validate_outcome(self, y: Vector) -> None: f"{self.__class__.__name__} does not support multiclass classification." f" Yet we found {len(np.unique(y))} classes." ) + if self.is_classification: + classes_0 = set(np.unique(y[w == 0])) + for tv in range(self.n_variants): + if set(np.unique(y[w == tv])) != classes_0: + raise ValueError( + f"Variants 0 and {tv} have seen different sets of classification outcomes. Please check your data." + ) + if len(classes_0) == 1: + raise ValueError( + f"There is only one class present in the classification outcome: {classes_0}. Please check your data." + ) def _validate_models(self) -> None: """Validate that the base models are appropriate. diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py index aa5912a..755c559 100644 --- a/metalearners/rlearner.py +++ b/metalearners/rlearner.py @@ -175,7 +175,7 @@ def fit( ) -> Self: self._validate_treatment(w) - self._validate_outcome(y) + self._validate_outcome(y, w) self._variants_indices = [] diff --git a/metalearners/slearner.py b/metalearners/slearner.py index 4fc9ad1..5b9818e 100644 --- a/metalearners/slearner.py +++ b/metalearners/slearner.py @@ -153,7 +153,7 @@ def fit( n_jobs_base_learners: int | None = None, ) -> Self: self._validate_treatment(w) - self._validate_outcome(y) + self._validate_outcome(y, w) self._fitted_treatments = convert_treatment(w) mock_model = self.nuisance_model_factory[_BASE_MODEL]( diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index 98c8aed..361a689 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -69,7 +69,7 @@ def fit( n_jobs_base_learners: int | None = None, ) -> Self: self._validate_treatment(w) - self._validate_outcome(y) + self._validate_outcome(y, w) self._treatment_variants_indices = [] diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py index dab02f2..10b09b2 100644 --- a/metalearners/xlearner.py +++ b/metalearners/xlearner.py @@ -93,7 +93,7 @@ def fit( n_jobs_base_learners: int | None = None, ) -> Self: self._validate_treatment(w) - self._validate_outcome(y) + self._validate_outcome(y, w) self._treatment_variants_indices = [] diff --git a/tests/test_drlearner.py b/tests/test_drlearner.py index f71dd50..fe3bf18 100644 --- a/tests/test_drlearner.py +++ b/tests/test_drlearner.py @@ -21,6 +21,20 @@ from .conftest import all_sklearn_regressors +def test_adaptive_clipping_smoke(dummy_dataset): + X, y, w = dummy_dataset + ml = DRLearner( + False, + 2, + LinearRegression, + LinearRegression, + LogisticRegression, + n_folds=2, + adaptive_clipping=True, + ) + ml.fit(X, y, w) + + @pytest.mark.parametrize( "treatment_model_factory, onnx_converter", ( diff --git a/tests/test_grid_search.py b/tests/test_grid_search.py index fd953ff..e29d3d3 100644 --- a/tests/test_grid_search.py +++ b/tests/test_grid_search.py @@ -25,7 +25,7 @@ {"base_model": [LinearRegression, LGBMRegressor]}, {"base_model": {"LGBMRegressor": {"n_estimators": [1, 2]}}}, 3, - 3, + ["metalearner", "base_model", "base_model_n_estimators"], ), ( SLearner, @@ -33,7 +33,7 @@ {"base_model": [LogisticRegression, LGBMClassifier]}, {"base_model": {"LGBMClassifier": {"n_estimators": [1, 2]}}}, 3, - 3, + ["metalearner", "base_model", "base_model_n_estimators"], ), ( TLearner, @@ -41,7 +41,11 @@ {"variant_outcome_model": [LinearRegression, LGBMRegressor]}, {"variant_outcome_model": {"LGBMRegressor": {"n_estimators": [1, 2, 3]}}}, 4, - 3, + [ + "metalearner", + "variant_outcome_model", + "variant_outcome_model_n_estimators", + ], ), ( XLearner, @@ -58,7 +62,16 @@ "treatment_effect_model": {"LGBMRegressor": {"n_estimators": [1]}}, }, 6, - 8, + [ + "metalearner", + "propensity_model", + "propensity_model_n_estimators", + "variant_outcome_model", + "control_effect_model", + "control_effect_model_n_estimators", + "treatment_effect_model", + "treatment_effect_model_n_estimators", + ], ), ( RLearner, @@ -75,7 +88,15 @@ }, }, 9, - 7, + [ + "metalearner", + "outcome_model", + "propensity_model", + "propensity_model_n_estimators", + "treatment_model", + "treatment_model_learning_rate", + "treatment_model_n_estimators", + ], ), ( DRLearner, @@ -89,7 +110,13 @@ "propensity_model": {"LGBMClassifier": {"n_estimators": [1, 2, 3, 4]}}, }, 4, - 5, + [ + "metalearner", + "propensity_model", + "propensity_model_n_estimators", + "variant_outcome_model", + "treatment_model", + ], ), ], ) @@ -125,7 +152,7 @@ def test_metalearnergridsearch_smoke( gs.fit(X, y, w, X_test, y_test, w_test) assert gs.results_ is not None assert gs.results_.shape[0] == expected_n_configs - assert len(gs.results_.index.names) == expected_index_cols + assert gs.results_.index.names == expected_index_cols train_scores_cols = set( c[6:] for c in list(gs.results_.columns) if c.startswith("train_") diff --git a/tests/test_learner.py b/tests/test_learner.py index f001eda..4aa8072 100644 --- a/tests/test_learner.py +++ b/tests/test_learner.py @@ -706,8 +706,8 @@ def test_validate_treatment_error_different_instantiation(metalearner_prefix): ) def test_validate_outcome_multi_class(metalearner_prefix, success): covariates = np.zeros((20, 1)) - w = np.array([0, 1] * 10) - y = np.array([0, 1] * 8 + [2] * 4) + w = np.array([0] * 10 + [1] * 10) + y = np.array([0, 1, 2, 3, 4] * 4) factory = metalearner_factory(metalearner_prefix) learner = factory( diff --git a/tests/test_metalearner.py b/tests/test_metalearner.py index 25165f8..dbf27de 100644 --- a/tests/test_metalearner.py +++ b/tests/test_metalearner.py @@ -1059,3 +1059,58 @@ def test_n_jobs_base_learners(implementation, rng): np.testing.assert_allclose(ml.predict(X, False), ml_2.predict(X, False)) np.testing.assert_allclose(ml.predict(X, True), ml_2.predict(X, True)) + + +@pytest.mark.parametrize( + "implementation", + [TLearner, SLearner, XLearner, RLearner, DRLearner], +) +@pytest.mark.parametrize("use_pandas", [False, True]) +def test_validate_outcome_one_class(implementation, use_pandas, rng): + X = rng.standard_normal((10, 2)) + y = np.zeros(10) + w = rng.integers(0, 2, 10) + if use_pandas: + X = pd.DataFrame(X) + y = pd.Series(y) + w = pd.Series(w) + + ml = implementation( + True, + 2, + LogisticRegression, + LinearRegression, + LogisticRegression, + ) + with pytest.raises( + ValueError, + match="There is only one class present in the classification outcome", + ): + ml.fit(X, y, w) + + +@pytest.mark.parametrize( + "implementation", + [TLearner, SLearner, XLearner, RLearner, DRLearner], +) +@pytest.mark.parametrize("use_pandas", [False, True]) +def test_validate_outcome_different_classes(implementation, use_pandas, rng): + X = rng.standard_normal((4, 2)) + y = np.array([0, 1, 0, 0]) + w = np.array([0, 0, 1, 1]) + if use_pandas: + X = pd.DataFrame(X) + y = pd.Series(y) + w = pd.Series(w) + + ml = implementation( + True, + 2, + LogisticRegression, + LinearRegression, + LogisticRegression, + ) + with pytest.raises( + ValueError, match="have seen different sets of classification outcomes." + ): + ml.fit(X, y, w)