diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 02d971c..f9d98ef 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -36,7 +36,7 @@ jobs:
         uses: prefix-dev/setup-pixi@v0.8.1
       - name: Run mypy
         run: |
-          pixi run jupyter nbconvert --to script docs/examples/*.ipynb
+          pixi run jupyter nbconvert --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags no-convert --to script docs/examples/*.ipynb
           for file in docs/examples/*.txt; do mv -- "$file" "${file%.txt}.py"; done
           pixi run mypy docs/examples/*.py
 
diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml
index eda2da2..2fcfa4d 100644
--- a/.github/workflows/package.yml
+++ b/.github/workflows/package.yml
@@ -1,5 +1,9 @@
 name: Package
-on: [push]
+on:
+  push:
+  release:
+    types:
+      - published
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
@@ -26,6 +30,10 @@ jobs:
     name: Upload to PyPI
     needs: [build]
     runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+      contents: write
+    environment: pypi
     if: github.event_name == 'release' && github.event.action == 'published'
     steps:
       - uses: actions/download-artifact@v4
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index a42a342..d04fad1 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,14 +7,30 @@
 Changelog
 =========
 
-0.6.0 (2024-06-**)
+0.7.0 (2024-07-12)
+------------------
+
+**New features**
+
+* Add optional ``adaptive_clipping`` parameter to :class:`metalearners.DRLearner`.
+
+**Other changes**
+
+* Change the index columns order in ``MetaLearnerGridSearch.results_``.
+
+* Raise a custom error if only one class is present in a classification outcome.
+
+* Raise a custom error if there are some treatment variants which have seen classification outcomes which have not appeared for some other treatment variant.
+
+
+0.6.0 (2024-07-08)
 ------------------
 
 **New features**
 
 * Implement :class:`metalearners.grid_search.MetaLearnerGridSearch`.
 
-* Add ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and
+* Add a ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and
   implement the abstract method for the :class:`metalearners.XLearner` and
   :class:`metalearners.DRLearner`.
 
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
new file mode 100644
index 0000000..b07ad9c
--- /dev/null
+++ b/docs/_static/custom.css
@@ -0,0 +1,30 @@
+/* Copied from https://github.com/executablebooks/MyST-NB/issues/453 */
+div.cell.tag_scroll-output div.cell_output {
+    max-height: 24em;
+    overflow-y: auto;
+    max-width: 100%;
+    overflow-x: auto;
+}
+
+div.cell.tag_scroll-output div.cell_output::-webkit-scrollbar {
+    width: 0.3rem;
+    height: 0.3rem;
+}
+
+div.cell.tag_scroll-output div.cell_output::-webkit-scrollbar-thumb {
+    background: #c1c1c1;
+    border-radius: 0.25rem;
+}
+
+div.cell.tag_scroll-output div.cell_output::-webkit-scrollbar-thumb:hover {
+    background: #a0a0a0;
+}
+
+@media print {
+    div.cell.tag_scroll-output div.cell_output {
+        max-height: unset;
+        overflow-y: visible;
+        max-width: unset;
+        overflow-x: visible;
+    }
+}
diff --git a/docs/conf.py b/docs/conf.py
index c6cdcf1..bb8effc 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -68,6 +68,9 @@
 numpydoc_show_class_members = False
 
 
+html_css_files = ["custom.css"]
+
+
 # Copied and adapted from
 # https://github.com/pandas-dev/pandas/blob/4a14d064187367cacab3ff4652a12a0e45d0711b/doc/source/conf.py#L613-L659
 # Required configuration function to use sphinx.ext.linkcode
diff --git a/docs/examples/example_gridsearch.ipynb b/docs/examples/example_gridsearch.ipynb
new file mode 100644
index 0000000..586c5d0
--- /dev/null
+++ b/docs/examples/example_gridsearch.ipynb
@@ -0,0 +1,349 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "hide-cell",
+     "no-convert"
+    ],
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%html\n",
+    "<style>\n",
+    "/* Any CSS style can go in here. */\n",
+    ".dataframe th {\n",
+    "    font-size: 12px;\n",
+    "}\n",
+    ".dataframe td {\n",
+    "    font-size: 12px;\n",
+    "}\n",
+    "</style>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "(example-grid-search)=\n",
+    "\n",
+    "# Tuning hyperparameters of a MetaLearner with ``MetaLearnerGridSearch``\n",
+    "\n",
+    "Motivation\n",
+    "----------\n",
+    "\n",
+    "We know that model selection and/or hyperparameter optimization (HPO) can\n",
+    "have massive impacts on the prediction quality in regular Machine\n",
+    "Learning. Yet, it seems that model selection and hyperparameter\n",
+    "optimization are  of substantial importance for CATE estimation with\n",
+    "MetaLearners, too, see e.g. [Machlanski et. al](https://arxiv.org/abs/2303.01412>).\n",
+    "\n",
+    "However, model selection and HPO for MetaLearners look quite different from what we're used to from e.g. simple supervised learning problems. Concretely,\n",
+    "\n",
+    "* In terms of a MetaLearners's option space, there are several levels\n",
+    "  to optimize for:\n",
+    "\n",
+    "  1. The MetaLearner architecture, e.g. R-Learner vs DR-Learner\n",
+    "  2. The model to choose per base estimator of said MetaLearner architecture, e.g. ``LogisticRegression`` vs ``LGBMClassifier``\n",
+    "  3. The model hyperparameters per base model\n",
+    "\n",
+    "*  On a conceptual level, it's not clear how to measure model quality\n",
+    "   for MetaLearners. As a proxy for the underlying quantity of\n",
+    "   interest one might look into base model performance, the R-Loss of\n",
+    "   the CATE estimates or some more elaborate approaches alluded to by\n",
+    "   [Machlanski et. al](https://arxiv.org/abs/2303.01412).\n",
+    "\n",
+    "We think that HPO can be divided into two camps:\n",
+    "\n",
+    "* Exploration of (hyperparameter, metric evaluation) pairs where the\n",
+    "  pairs do not influence each other (e.g. grid search, random search)\n",
+    "\n",
+    "* Exploration of (hyperparameter, metric evaluation) pairs where the\n",
+    "  pairs do influence each other (e.g. Bayesian optimization,\n",
+    "  evolutionary algorithms); in other words, there is a feedback-loop between\n",
+    "  sample result and sample\n",
+    "\n",
+    "In this example, we will illustrate the former and how one can make use of\n",
+    "{class}`~metalearners.grid_search.MetaLearnerGridSearch` for it. For the latter please\n",
+    "refer to the {ref}`example on model selection with optuna<example-optuna>`.\n",
+    "\n",
+    "Loading the data\n",
+    "----------------\n",
+    "\n",
+    "Just like in our {ref}`example on estimating CATEs with a MetaLearner\n",
+    "<example-basic>`, we will first load some experiment data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "from git_root import git_root\n",
+    "\n",
+    "df = pd.read_csv(git_root(\"data/learning_mindset.zip\"))\n",
+    "outcome_column = \"achievement_score\"\n",
+    "treatment_column = \"intervention\"\n",
+    "feature_columns = [\n",
+    "    column for column in df.columns if column not in [outcome_column, treatment_column]\n",
+    "]\n",
+    "categorical_feature_columns = [\n",
+    "    \"ethnicity\",\n",
+    "    \"gender\",\n",
+    "    \"frst_in_family\",\n",
+    "    \"school_urbanicity\",\n",
+    "    \"schoolid\",\n",
+    "]\n",
+    "# Note that explicitly setting the dtype of these features to category\n",
+    "# allows both lightgbm as well as shap plots to\n",
+    "# 1. Operate on features which are not of type int, bool or float\n",
+    "# 2. Correctly interpret categoricals with int values to be\n",
+    "#    interpreted as categoricals, as compared to ordinals/numericals.\n",
+    "for categorical_feature_column in categorical_feature_columns:\n",
+    "    df[categorical_feature_column] = df[categorical_feature_column].astype(\"category\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now that we've loaded the experiment data, we can split it up into\n",
+    "train and validation data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "X_train, X_validation, y_train, y_validation, w_train, w_validation = train_test_split(\n",
+    "    df[feature_columns], df[outcome_column], df[treatment_column], test_size=0.25\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Performing the grid search\n",
+    "--------------------------\n",
+    "\n",
+    "We can run a grid search by using the {class}`~metalearners.grid_search.MetaLearnerGridSearch`\n",
+    "class. However, it's important to note that this class only supports a single MetaLearner\n",
+    "architecture at a time. If you're interested in conducting a grid search across multiple architectures,\n",
+    "it will require several grid searches.\n",
+    "\n",
+    "Let's say we want to work with a {class}`~metalearners.DRLearner`. We can check the names of\n",
+    "the base models for this architecture with the following code:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from metalearners import DRLearner\n",
+    "\n",
+    "print(DRLearner.nuisance_model_specifications().keys())\n",
+    "print(DRLearner.treatment_model_specifications().keys())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "source": [
+    "We see that this MetaLearner contains three base models: ``\"variant_outcome_model\"``,\n",
+    "``\"propensity_model\"`` and ``\"treatment_model\"``.\n",
+    "\n",
+    "Since our problem has a regression outcome, the ``\"variant_outcome_model\"`` should be a regressor.\n",
+    "The ``\"propensity_model\"`` and ``\"treatment_model\"`` are always a classifier and a regressor\n",
+    "respectively.\n",
+    "\n",
+    "To instantiate the {class}`~metalearners.grid_search.MetaLearnerGridSearch` object we need to\n",
+    "specify the different base models to be used. Moreover, if we'd like to use non-default hyperparameters for a given base model, we need to specify those, too.\n",
+    "\n",
+    "In this tutorial we test a ``LinearRegression`` and ``LGBMRegressor`` for the outcome model,\n",
+    "a ``LGBMClassifier`` and ``QuadraticDiscriminantAnalysis`` for the propensity model and a\n",
+    "``LGBMRegressor`` for the treatment model.\n",
+    "\n",
+    "Finally we can define the hyperparameters to test for the base models using the ``param_grid``\n",
+    "parameter."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from metalearners.grid_search import MetaLearnerGridSearch\n",
+    "from lightgbm import LGBMClassifier, LGBMRegressor\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n",
+    "\n",
+    "gs = MetaLearnerGridSearch(\n",
+    "    metalearner_factory=DRLearner,\n",
+    "    metalearner_params={\"is_classification\": False, \"n_variants\": 2},\n",
+    "    base_learner_grid={\n",
+    "        \"variant_outcome_model\": [LinearRegression, LGBMRegressor],\n",
+    "        \"propensity_model\": [LGBMClassifier, QuadraticDiscriminantAnalysis],\n",
+    "        \"treatment_model\": [LGBMRegressor],\n",
+    "    },\n",
+    "    param_grid={\n",
+    "        \"variant_outcome_model\": {\n",
+    "            \"LGBMRegressor\": {\"n_estimators\": [3, 5], \"verbose\": [-1]}\n",
+    "        },\n",
+    "        \"treatment_model\": {\"LGBMRegressor\": {\"n_estimators\": [1, 2], \"verbose\": [-1]}},\n",
+    "        \"propensity_model\": {\n",
+    "            \"LGBMClassifier\": {\"n_estimators\": [1, 2, 3], \"verbose\": [-1]}\n",
+    "        },\n",
+    "    },\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can call {meth}`~metalearners.grid_search.MetaLearnerGridSearch.fit` with the train\n",
+    "and validation data and can inspect the results ``DataFrame`` in ``results_``."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "scroll-output"
+    ],
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "gs.fit(X_train, y_train, w_train, X_validation, y_validation, w_validation)\n",
+    "gs.results_"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Reusing base models\n",
+    "--------------------\n",
+    "In order to decrease the grid search runtime, it may sometimes be desirable to reuse some nuisance models.\n",
+    "We refer to our {ref}`example of model reusage <example-reuse>` for a more in depth explanation\n",
+    "on how this can be achieved, but here we'll show an example for the integration of model\n",
+    "reusage with {class}`~metalearners.grid_search.MetaLearnerGridSearch`.\n",
+    "\n",
+    "We will reuse the ``\"variant_outcome_model\"`` of a {class}`~metalearners.TLearner` for\n",
+    "a grid search over the {class}`~metalearners.XLearner`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "scroll-output"
+    ],
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from metalearners import TLearner, XLearner\n",
+    "\n",
+    "tl = TLearner(\n",
+    "    False,\n",
+    "    2,\n",
+    "    LGBMRegressor,\n",
+    "    nuisance_model_params={\"verbose\": -1, \"n_estimators\": 20, \"learning_rate\": 0.05},\n",
+    "    n_folds=2,\n",
+    ")\n",
+    "tl.fit(X_train, y_train, w_train)\n",
+    "\n",
+    "gs = MetaLearnerGridSearch(\n",
+    "    metalearner_factory=XLearner,\n",
+    "    metalearner_params={\n",
+    "        \"is_classification\": False,\n",
+    "        \"n_variants\": 2,\n",
+    "        \"n_folds\": 5, # The number of folds does not need to be the same as in the TLearner\n",
+    "        \"fitted_nuisance_models\": {\n",
+    "            \"variant_outcome_model\": tl._nuisance_models[\"variant_outcome_model\"]\n",
+    "        },\n",
+    "    },\n",
+    "    base_learner_grid={\n",
+    "        \"propensity_model\": [LGBMClassifier],\n",
+    "        \"control_effect_model\": [LGBMRegressor, LinearRegression],\n",
+    "        \"treatment_effect_model\": [LGBMRegressor, LinearRegression],\n",
+    "    },\n",
+    "    param_grid={\n",
+    "        \"propensity_model\": {\"LGBMClassifier\": {\"n_estimators\": [5], \"verbose\": [-1]}},\n",
+    "        \"treatment_effect_model\": {\n",
+    "            \"LGBMRegressor\": {\"n_estimators\": [5, 10], \"verbose\": [-1]}\n",
+    "        },\n",
+    "        \"control_effect_model\": {\n",
+    "            \"LGBMRegressor\": {\"n_estimators\": [1, 3], \"verbose\": [-1]}\n",
+    "        },\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "gs.fit(X_train, y_train, w_train, X_validation, y_validation, w_validation)\n",
+    "gs.results_"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Further comments\n",
+    "-------------------\n",
+    "* We strongly recommend only reusing base models if they have been trained on\n",
+    "  exactly the same data. If this is not the case, some functionalities\n",
+    "  will probably not work as hoped for."
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/examples/example_lime.ipynb b/docs/examples/example_lime.ipynb
index d3c97ee..e967df8 100644
--- a/docs/examples/example_lime.ipynb
+++ b/docs/examples/example_lime.ipynb
@@ -217,10 +217,10 @@
    "source": [
     "### Generating lime plots\n",
     "\n",
-    "``lime`` will expect a function which consumes an ``X`` and returns\n",
+    "``lime`` will expect a function which consumes a ``np.ndarray`` ``X`` and returns\n",
     "a one-dimensional vector of the same length as ``X``. We'll have to\n",
     "adapt the {meth}`~metalearners.rlearner.RLearner.predict` method of\n",
-    "our {class}`~metalearners.rlearner.RLearner` in two ways:\n",
+    "our {class}`~metalearners.rlearner.RLearner` in three ways:\n",
     "\n",
     "* We need to pass a value for the necessary parameter ``is_oos`` to {meth}`~metalearners.rlearner.RLearner.predict`.\n",
     "\n",
@@ -228,6 +228,10 @@
     "  {meth}`~metalearners.rlearner.RLearner.predict` to be one-dimensional. This\n",
     "  we can easily achieve via {func}`metalearners.utils.simplify_output`.\n",
     "\n",
+    "* We need to reconvert the ``np.ndarray`` to a ``pd.DataFrame`` to work with categoricals\n",
+    "  and specify the correct categories so the categorical codes are the same (which are used internally in LightGBM),\n",
+    "  see [this issue](https://github.com/microsoft/LightGBM/issues/5162) for more context.\n",
+    "\n",
     "This we can do as follows:"
    ]
   },
@@ -244,7 +248,11 @@
     "from metalearners.utils import simplify_output\n",
     "\n",
     "def predict(X):\n",
-    "    return simplify_output(rlearner.predict(X, is_oos=True))"
+    "    X_pd = pd.DataFrame(X, copy=True)\n",
+    "    for c in X_pd.columns:\n",
+    "        # This line sets the cat.categories correctly (even if not all are present in X)\n",
+    "        X_pd[c] = X_pd[c].astype(df[feature_columns].iloc[:, c].dtype)\n",
+    "    return simplify_output(rlearner.predict(X_pd, is_oos=True))"
    ]
   },
   {
@@ -254,26 +262,7 @@
     "where we set ``is_oos=True`` since ``lime`` will call\n",
     "{meth}`~metalearners.rlearner.RLearner.predict`\n",
     "with various inputs which will not be able to be recognized as\n",
-    "in-sample data.\n",
-    "\n",
-    "Since ``lime`` expects ``numpy`` datastructures, we'll have to\n",
-    "manually encode the categorical features of our ``pandas`` data\n",
-    "structure, see [this issue](https://github.com/microsoft/LightGBM/issues/5162) for more context."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "X = df[feature_columns].copy()\n",
-    "for categorical_feature_column in categorical_feature_columns:\n",
-    "    X[categorical_feature_column] = X[categorical_feature_column].cat.codes"
+    "in-sample data."
    ]
   },
   {
@@ -332,10 +321,8 @@
     "from lime.lime_tabular import LimeTabularExplainer\n",
     "from lime.submodular_pick import SubmodularPick\n",
     "\n",
-    "X = X.to_numpy()\n",
-    "\n",
     "explainer = LimeTabularExplainer(\n",
-    "    X,\n",
+    "    df[feature_columns].to_numpy(),\n",
     "    feature_names=feature_columns,\n",
     "    categorical_features=categorical_feature_indices,\n",
     "    categorical_names=categorical_names,\n",
@@ -345,7 +332,7 @@
     ")\n",
     "\n",
     "sp = SubmodularPick(\n",
-    "    data=X,\n",
+    "    data=df[feature_columns].to_numpy(),\n",
     "    explainer=explainer,\n",
     "    predict_fn=predict,\n",
     "    method=\"sample\",\n",
diff --git a/docs/examples/example_optuna.ipynb b/docs/examples/example_optuna.ipynb
index 1d87f94..2479cf2 100644
--- a/docs/examples/example_optuna.ipynb
+++ b/docs/examples/example_optuna.ipynb
@@ -46,6 +46,7 @@
     "In this example, we will illustrate the latter camp based on an\n",
     "application of [optuna](https://github.com/optuna/optuna) -- a\n",
     "popular framework for HPO -- in interplay with ``metalearners``.\n",
+    "For the former please refer to the {ref}`example on hyperparameter tuning with MetaLearnerGridSearch<example-grid-search>`.\n",
     "\n",
     "Installation\n",
     "------------\n",
diff --git a/docs/examples/index.rst b/docs/examples/index.rst
index 629a177..d825a99 100644
--- a/docs/examples/index.rst
+++ b/docs/examples/index.rst
@@ -10,4 +10,5 @@ Examples
    Explainability: Lime plots of MetaLearners <example_lime.ipynb>
    Explainability: Feature importance and SHAP values <example_feature_importance_shap.ipynb>
    Model selection with optuna <example_optuna.ipynb>
+   Tuning hyperparameters of a MetaLearner with MetaLearnerGridSearch <example_gridsearch.ipynb>
    Generating data <example_data_generation.ipynb>
diff --git a/metalearners/drlearner.py b/metalearners/drlearner.py
index 7c898b5..ca1d4e6 100644
--- a/metalearners/drlearner.py
+++ b/metalearners/drlearner.py
@@ -8,7 +8,16 @@
 from joblib import Parallel, delayed
 from typing_extensions import Self
 
-from metalearners._typing import Matrix, OosMethod, Scoring, Vector
+from metalearners._typing import (
+    Features,
+    Matrix,
+    ModelFactory,
+    OosMethod,
+    Params,
+    Scoring,
+    Vector,
+    _ScikitModel,
+)
 from metalearners._utils import (
     check_onnx_installed,
     check_spox_installed,
@@ -22,7 +31,7 @@
     validate_valid_treatment_variant_not_control,
     warning_experimental_feature,
 )
-from metalearners.cross_fit_estimator import OVERALL
+from metalearners.cross_fit_estimator import OVERALL, CrossFitEstimator
 from metalearners.metalearner import (
     NUISANCE,
     PROPENSITY_MODEL,
@@ -57,6 +66,9 @@ class DRLearner(_ConditionalAverageOutcomeMetaLearner):
 
         * ``"treatment_model"`` which estimates :math:`\mathbb{E}[Y(k) - Y(0) | X]`
 
+    If ``adaptive_clipping`` is set to ``True``, then the pseudo outcomes are computed using
+    adaptive propensity clipping described in section 4.1, equation *DR-Switch* of
+    `Mahajan et al. (2024) <https://arxiv.org/pdf/2211.01939>`_.
     """
 
     @classmethod
@@ -89,6 +101,40 @@ def _supports_multi_treatment(cls) -> bool:
     def _supports_multi_class(cls) -> bool:
         return False
 
+    def __init__(
+        self,
+        is_classification: bool,
+        n_variants: int,
+        nuisance_model_factory: ModelFactory | None = None,
+        treatment_model_factory: ModelFactory | None = None,
+        propensity_model_factory: type[_ScikitModel] | None = None,
+        nuisance_model_params: Params | dict[str, Params] | None = None,
+        treatment_model_params: Params | dict[str, Params] | None = None,
+        propensity_model_params: Params | None = None,
+        fitted_nuisance_models: dict[str, list[CrossFitEstimator]] | None = None,
+        fitted_propensity_model: CrossFitEstimator | None = None,
+        feature_set: Features | dict[str, Features] | None = None,
+        n_folds: int | dict[str, int] = 10,
+        random_state: int | None = None,
+        adaptive_clipping: bool = False,
+    ):
+        super().__init__(
+            nuisance_model_factory=nuisance_model_factory,
+            is_classification=is_classification,
+            n_variants=n_variants,
+            treatment_model_factory=treatment_model_factory,
+            propensity_model_factory=propensity_model_factory,
+            nuisance_model_params=nuisance_model_params,
+            treatment_model_params=treatment_model_params,
+            propensity_model_params=propensity_model_params,
+            fitted_nuisance_models=fitted_nuisance_models,
+            fitted_propensity_model=fitted_propensity_model,
+            feature_set=feature_set,
+            n_folds=n_folds,
+            random_state=random_state,
+        )
+        self.adaptive_clipping = adaptive_clipping
+
     def fit(
         self,
         X: Matrix,
@@ -100,7 +146,7 @@ def fit(
         n_jobs_base_learners: int | None = None,
     ) -> Self:
         self._validate_treatment(w)
-        self._validate_outcome(y)
+        self._validate_outcome(y, w)
 
         self._treatment_variants_indices = []
 
@@ -324,6 +370,14 @@ def _pseudo_outcome(
             - y0_estimate
         )
 
+        if self.adaptive_clipping:
+            t_pseudo_outcome = y1_estimate - y0_estimate
+            pseudo_outcome = np.where(
+                propensity_estimates.min(axis=1) < epsilon,
+                t_pseudo_outcome,
+                pseudo_outcome,
+            )
+
         return pseudo_outcome
 
     @classmethod
diff --git a/metalearners/grid_search.py b/metalearners/grid_search.py
index 1d85f63..cc9c732 100644
--- a/metalearners/grid_search.py
+++ b/metalearners/grid_search.py
@@ -83,12 +83,12 @@ def _format_results(results: Sequence[_GSResult]) -> pd.DataFrame:
     for result in results:
         row: dict[str, str | int | float] = {}
         row["metalearner"] = result.metalearner.__class__.__name__
-        nuisance_models = (
+        nuisance_models = sorted(
             set(result.metalearner.nuisance_model_specifications().keys())
             - result.metalearner._prefitted_nuisance_models
         )
-        treatment_models = set(
-            result.metalearner.treatment_model_specifications().keys()
+        treatment_models = sorted(
+            set(result.metalearner.treatment_model_specifications().keys())
         )
         for model_kind in nuisance_models:
             row[model_kind] = result.metalearner.nuisance_model_factory[
@@ -115,13 +115,16 @@ def _format_results(results: Sequence[_GSResult]) -> pd.DataFrame:
                 row[f"test_{name}"] = value
         rows.append(row)
     df = pd.DataFrame(rows)
-    index_columns = [
-        c
-        for c in df.columns
-        if not c.endswith("_time")
-        and not c.startswith("train_")
-        and not c.startswith("test_")
-    ]
+    sorted_cols = sorted(df.columns)
+    index_columns = ["metalearner"]
+    for model_kind in nuisance_models:
+        for c in sorted_cols:
+            if c.startswith(model_kind):
+                index_columns.append(c)
+    for model_kind in treatment_models:
+        for c in sorted_cols:
+            if c.startswith(model_kind):
+                index_columns.append(c)
     df = df.set_index(index_columns)
     return df
 
diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py
index e9be774..093106b 100644
--- a/metalearners/metalearner.py
+++ b/metalearners/metalearner.py
@@ -318,7 +318,7 @@ def _validate_treatment(self, w: Vector) -> None:
                 f"Yet we found the values {set(np.unique(w))}."
             )
 
-    def _validate_outcome(self, y: Vector) -> None:
+    def _validate_outcome(self, y: Vector, w: Vector) -> None:
         if (
             self.is_classification
             and not self._supports_multi_class()
@@ -328,6 +328,17 @@ def _validate_outcome(self, y: Vector) -> None:
                 f"{self.__class__.__name__} does not support multiclass classification."
                 f" Yet we found {len(np.unique(y))} classes."
             )
+        if self.is_classification:
+            classes_0 = set(np.unique(y[w == 0]))
+            for tv in range(self.n_variants):
+                if set(np.unique(y[w == tv])) != classes_0:
+                    raise ValueError(
+                        f"Variants 0 and {tv} have seen different sets of classification outcomes. Please check your data."
+                    )
+            if len(classes_0) == 1:
+                raise ValueError(
+                    f"There is only one class present in the classification outcome: {classes_0}. Please check your data."
+                )
 
     def _validate_models(self) -> None:
         """Validate that the base models are appropriate.
diff --git a/metalearners/rlearner.py b/metalearners/rlearner.py
index aa5912a..755c559 100644
--- a/metalearners/rlearner.py
+++ b/metalearners/rlearner.py
@@ -175,7 +175,7 @@ def fit(
     ) -> Self:
 
         self._validate_treatment(w)
-        self._validate_outcome(y)
+        self._validate_outcome(y, w)
 
         self._variants_indices = []
 
diff --git a/metalearners/slearner.py b/metalearners/slearner.py
index 4fc9ad1..5b9818e 100644
--- a/metalearners/slearner.py
+++ b/metalearners/slearner.py
@@ -153,7 +153,7 @@ def fit(
         n_jobs_base_learners: int | None = None,
     ) -> Self:
         self._validate_treatment(w)
-        self._validate_outcome(y)
+        self._validate_outcome(y, w)
         self._fitted_treatments = convert_treatment(w)
 
         mock_model = self.nuisance_model_factory[_BASE_MODEL](
diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py
index 98c8aed..361a689 100644
--- a/metalearners/tlearner.py
+++ b/metalearners/tlearner.py
@@ -69,7 +69,7 @@ def fit(
         n_jobs_base_learners: int | None = None,
     ) -> Self:
         self._validate_treatment(w)
-        self._validate_outcome(y)
+        self._validate_outcome(y, w)
 
         self._treatment_variants_indices = []
 
diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py
index dab02f2..10b09b2 100644
--- a/metalearners/xlearner.py
+++ b/metalearners/xlearner.py
@@ -93,7 +93,7 @@ def fit(
         n_jobs_base_learners: int | None = None,
     ) -> Self:
         self._validate_treatment(w)
-        self._validate_outcome(y)
+        self._validate_outcome(y, w)
 
         self._treatment_variants_indices = []
 
diff --git a/tests/test_drlearner.py b/tests/test_drlearner.py
index f71dd50..fe3bf18 100644
--- a/tests/test_drlearner.py
+++ b/tests/test_drlearner.py
@@ -21,6 +21,20 @@
 from .conftest import all_sklearn_regressors
 
 
+def test_adaptive_clipping_smoke(dummy_dataset):
+    X, y, w = dummy_dataset
+    ml = DRLearner(
+        False,
+        2,
+        LinearRegression,
+        LinearRegression,
+        LogisticRegression,
+        n_folds=2,
+        adaptive_clipping=True,
+    )
+    ml.fit(X, y, w)
+
+
 @pytest.mark.parametrize(
     "treatment_model_factory, onnx_converter",
     (
diff --git a/tests/test_grid_search.py b/tests/test_grid_search.py
index fd953ff..e29d3d3 100644
--- a/tests/test_grid_search.py
+++ b/tests/test_grid_search.py
@@ -25,7 +25,7 @@
             {"base_model": [LinearRegression, LGBMRegressor]},
             {"base_model": {"LGBMRegressor": {"n_estimators": [1, 2]}}},
             3,
-            3,
+            ["metalearner", "base_model", "base_model_n_estimators"],
         ),
         (
             SLearner,
@@ -33,7 +33,7 @@
             {"base_model": [LogisticRegression, LGBMClassifier]},
             {"base_model": {"LGBMClassifier": {"n_estimators": [1, 2]}}},
             3,
-            3,
+            ["metalearner", "base_model", "base_model_n_estimators"],
         ),
         (
             TLearner,
@@ -41,7 +41,11 @@
             {"variant_outcome_model": [LinearRegression, LGBMRegressor]},
             {"variant_outcome_model": {"LGBMRegressor": {"n_estimators": [1, 2, 3]}}},
             4,
-            3,
+            [
+                "metalearner",
+                "variant_outcome_model",
+                "variant_outcome_model_n_estimators",
+            ],
         ),
         (
             XLearner,
@@ -58,7 +62,16 @@
                 "treatment_effect_model": {"LGBMRegressor": {"n_estimators": [1]}},
             },
             6,
-            8,
+            [
+                "metalearner",
+                "propensity_model",
+                "propensity_model_n_estimators",
+                "variant_outcome_model",
+                "control_effect_model",
+                "control_effect_model_n_estimators",
+                "treatment_effect_model",
+                "treatment_effect_model_n_estimators",
+            ],
         ),
         (
             RLearner,
@@ -75,7 +88,15 @@
                 },
             },
             9,
-            7,
+            [
+                "metalearner",
+                "outcome_model",
+                "propensity_model",
+                "propensity_model_n_estimators",
+                "treatment_model",
+                "treatment_model_learning_rate",
+                "treatment_model_n_estimators",
+            ],
         ),
         (
             DRLearner,
@@ -89,7 +110,13 @@
                 "propensity_model": {"LGBMClassifier": {"n_estimators": [1, 2, 3, 4]}},
             },
             4,
-            5,
+            [
+                "metalearner",
+                "propensity_model",
+                "propensity_model_n_estimators",
+                "variant_outcome_model",
+                "treatment_model",
+            ],
         ),
     ],
 )
@@ -125,7 +152,7 @@ def test_metalearnergridsearch_smoke(
     gs.fit(X, y, w, X_test, y_test, w_test)
     assert gs.results_ is not None
     assert gs.results_.shape[0] == expected_n_configs
-    assert len(gs.results_.index.names) == expected_index_cols
+    assert gs.results_.index.names == expected_index_cols
 
     train_scores_cols = set(
         c[6:] for c in list(gs.results_.columns) if c.startswith("train_")
diff --git a/tests/test_learner.py b/tests/test_learner.py
index f001eda..4aa8072 100644
--- a/tests/test_learner.py
+++ b/tests/test_learner.py
@@ -706,8 +706,8 @@ def test_validate_treatment_error_different_instantiation(metalearner_prefix):
 )
 def test_validate_outcome_multi_class(metalearner_prefix, success):
     covariates = np.zeros((20, 1))
-    w = np.array([0, 1] * 10)
-    y = np.array([0, 1] * 8 + [2] * 4)
+    w = np.array([0] * 10 + [1] * 10)
+    y = np.array([0, 1, 2, 3, 4] * 4)
 
     factory = metalearner_factory(metalearner_prefix)
     learner = factory(
diff --git a/tests/test_metalearner.py b/tests/test_metalearner.py
index 25165f8..dbf27de 100644
--- a/tests/test_metalearner.py
+++ b/tests/test_metalearner.py
@@ -1059,3 +1059,58 @@ def test_n_jobs_base_learners(implementation, rng):
 
     np.testing.assert_allclose(ml.predict(X, False), ml_2.predict(X, False))
     np.testing.assert_allclose(ml.predict(X, True), ml_2.predict(X, True))
+
+
+@pytest.mark.parametrize(
+    "implementation",
+    [TLearner, SLearner, XLearner, RLearner, DRLearner],
+)
+@pytest.mark.parametrize("use_pandas", [False, True])
+def test_validate_outcome_one_class(implementation, use_pandas, rng):
+    X = rng.standard_normal((10, 2))
+    y = np.zeros(10)
+    w = rng.integers(0, 2, 10)
+    if use_pandas:
+        X = pd.DataFrame(X)
+        y = pd.Series(y)
+        w = pd.Series(w)
+
+    ml = implementation(
+        True,
+        2,
+        LogisticRegression,
+        LinearRegression,
+        LogisticRegression,
+    )
+    with pytest.raises(
+        ValueError,
+        match="There is only one class present in the classification outcome",
+    ):
+        ml.fit(X, y, w)
+
+
+@pytest.mark.parametrize(
+    "implementation",
+    [TLearner, SLearner, XLearner, RLearner, DRLearner],
+)
+@pytest.mark.parametrize("use_pandas", [False, True])
+def test_validate_outcome_different_classes(implementation, use_pandas, rng):
+    X = rng.standard_normal((4, 2))
+    y = np.array([0, 1, 0, 0])
+    w = np.array([0, 0, 1, 1])
+    if use_pandas:
+        X = pd.DataFrame(X)
+        y = pd.Series(y)
+        w = pd.Series(w)
+
+    ml = implementation(
+        True,
+        2,
+        LogisticRegression,
+        LinearRegression,
+        LogisticRegression,
+    )
+    with pytest.raises(
+        ValueError, match="have seen different sets of classification outcomes."
+    ):
+        ml.fit(X, y, w)