Skip to content

Commit

Permalink
Merge branch 'main' into survival_example
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescMartiEscofetQC committed Jul 12, 2024
2 parents dc467f5 + 3d8f033 commit ae2d825
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 49 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/package.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
name: Package
on: [push]
on:
push:
release:
types:
- published

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand All @@ -26,6 +30,10 @@ jobs:
name: Upload to PyPI
needs: [build]
runs-on: ubuntu-latest
permissions:
id-token: write
contents: write
environment: pypi
if: github.event_name == 'release' && github.event.action == 'published'
steps:
- uses: actions/download-artifact@v4
Expand Down
15 changes: 13 additions & 2 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,25 @@
Changelog
=========

0.6.0 (2024-06-**)
0.6.1 (2024-07-xx)
------------------

**New features**

* Add optional ``adaptive_clipping`` parameter to :class:`metalearners.DRLearner`.

**Other changes**

* Changed the index columns order in ``MetaLearnerGridSearch.results_``.

0.6.0 (2024-07-08)
------------------

**New features**

* Implement :class:`metalearners.grid_search.MetaLearnerGridSearch`.

* Add ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and
* Add a ``scoring`` parameter to :meth:`metalearners.metalearner.MetaLearner.evaluate` and
implement the abstract method for the :class:`metalearners.XLearner` and
:class:`metalearners.DRLearner`.

Expand Down
41 changes: 14 additions & 27 deletions docs/examples/example_lime.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -217,17 +217,21 @@
"source": [
"### Generating lime plots\n",
"\n",
"``lime`` will expect a function which consumes an ``X`` and returns\n",
"``lime`` will expect a function which consumes a ``np.ndarray`` ``X`` and returns\n",
"a one-dimensional vector of the same length as ``X``. We'll have to\n",
"adapt the {meth}`~metalearners.rlearner.RLearner.predict` method of\n",
"our {class}`~metalearners.rlearner.RLearner` in two ways:\n",
"our {class}`~metalearners.rlearner.RLearner` in three ways:\n",
"\n",
"* We need to pass a value for the necessary parameter ``is_oos`` to {meth}`~metalearners.rlearner.RLearner.predict`.\n",
"\n",
"* We need to reshape the output of\n",
" {meth}`~metalearners.rlearner.RLearner.predict` to be one-dimensional. This\n",
" we can easily achieve via {func}`metalearners.utils.simplify_output`.\n",
"\n",
"* We need to reconvert the ``np.ndarray`` to a ``pd.DataFrame`` to work with categoricals\n",
" and specify the correct categories so the categorical codes are the same (which are used internally in LightGBM),\n",
" see [this issue](https://github.com/microsoft/LightGBM/issues/5162) for more context.\n",
"\n",
"This we can do as follows:"
]
},
Expand All @@ -244,7 +248,11 @@
"from metalearners.utils import simplify_output\n",
"\n",
"def predict(X):\n",
" return simplify_output(rlearner.predict(X, is_oos=True))"
" X_pd = pd.DataFrame(X, copy=True)\n",
" for c in X_pd.columns:\n",
" # This line sets the cat.categories correctly (even if not all are present in X)\n",
" X_pd[c] = X_pd[c].astype(df[feature_columns].iloc[:, c].dtype)\n",
" return simplify_output(rlearner.predict(X_pd, is_oos=True))"
]
},
{
Expand All @@ -254,26 +262,7 @@
"where we set ``is_oos=True`` since ``lime`` will call\n",
"{meth}`~metalearners.rlearner.RLearner.predict`\n",
"with various inputs which will not be able to be recognized as\n",
"in-sample data.\n",
"\n",
"Since ``lime`` expects ``numpy`` datastructures, we'll have to\n",
"manually encode the categorical features of our ``pandas`` data\n",
"structure, see [this issue](https://github.com/microsoft/LightGBM/issues/5162) for more context."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"X = df[feature_columns].copy()\n",
"for categorical_feature_column in categorical_feature_columns:\n",
" X[categorical_feature_column] = X[categorical_feature_column].cat.codes"
"in-sample data."
]
},
{
Expand Down Expand Up @@ -332,10 +321,8 @@
"from lime.lime_tabular import LimeTabularExplainer\n",
"from lime.submodular_pick import SubmodularPick\n",
"\n",
"X = X.to_numpy()\n",
"\n",
"explainer = LimeTabularExplainer(\n",
" X,\n",
" df[feature_columns].to_numpy(),\n",
" feature_names=feature_columns,\n",
" categorical_features=categorical_feature_indices,\n",
" categorical_names=categorical_names,\n",
Expand All @@ -345,7 +332,7 @@
")\n",
"\n",
"sp = SubmodularPick(\n",
" data=X,\n",
" data=df[feature_columns].to_numpy(),\n",
" explainer=explainer,\n",
" predict_fn=predict,\n",
" method=\"sample\",\n",
Expand Down
58 changes: 56 additions & 2 deletions metalearners/drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,16 @@
from joblib import Parallel, delayed
from typing_extensions import Self

from metalearners._typing import Matrix, OosMethod, Scoring, Vector
from metalearners._typing import (
Features,
Matrix,
ModelFactory,
OosMethod,
Params,
Scoring,
Vector,
_ScikitModel,
)
from metalearners._utils import (
clip_element_absolute_value_to_epsilon,
get_one,
Expand All @@ -15,7 +24,7 @@
index_matrix,
validate_valid_treatment_variant_not_control,
)
from metalearners.cross_fit_estimator import OVERALL
from metalearners.cross_fit_estimator import OVERALL, CrossFitEstimator
from metalearners.metalearner import (
NUISANCE,
PROPENSITY_MODEL,
Expand Down Expand Up @@ -50,6 +59,9 @@ class DRLearner(_ConditionalAverageOutcomeMetaLearner):
* ``"treatment_model"`` which estimates :math:`\mathbb{E}[Y(k) - Y(0) | X]`
If ``adaptive_clipping`` is set to ``True``, then the pseudo outcomes are computed using
adaptive propensity clipping described in section 4.1, equation *DR-Switch* of
`Mahajan et al. (2024) <https://arxiv.org/pdf/2211.01939>`_.
"""

@classmethod
Expand Down Expand Up @@ -82,6 +94,40 @@ def _supports_multi_treatment(cls) -> bool:
def _supports_multi_class(cls) -> bool:
return False

def __init__(
self,
is_classification: bool,
n_variants: int,
nuisance_model_factory: ModelFactory | None = None,
treatment_model_factory: ModelFactory | None = None,
propensity_model_factory: type[_ScikitModel] | None = None,
nuisance_model_params: Params | dict[str, Params] | None = None,
treatment_model_params: Params | dict[str, Params] | None = None,
propensity_model_params: Params | None = None,
fitted_nuisance_models: dict[str, list[CrossFitEstimator]] | None = None,
fitted_propensity_model: CrossFitEstimator | None = None,
feature_set: Features | dict[str, Features] | None = None,
n_folds: int | dict[str, int] = 10,
random_state: int | None = None,
adaptive_clipping: bool = False,
):
super().__init__(
nuisance_model_factory=nuisance_model_factory,
is_classification=is_classification,
n_variants=n_variants,
treatment_model_factory=treatment_model_factory,
propensity_model_factory=propensity_model_factory,
nuisance_model_params=nuisance_model_params,
treatment_model_params=treatment_model_params,
propensity_model_params=propensity_model_params,
fitted_nuisance_models=fitted_nuisance_models,
fitted_propensity_model=fitted_propensity_model,
feature_set=feature_set,
n_folds=n_folds,
random_state=random_state,
)
self.adaptive_clipping = adaptive_clipping

def fit(
self,
X: Matrix,
Expand Down Expand Up @@ -317,4 +363,12 @@ def _pseudo_outcome(
- y0_estimate
)

if self.adaptive_clipping:
t_pseudo_outcome = y1_estimate - y0_estimate
pseudo_outcome = np.where(
propensity_estimates.min(axis=1) < epsilon,
t_pseudo_outcome,
pseudo_outcome,
)

return pseudo_outcome
23 changes: 13 additions & 10 deletions metalearners/grid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,12 @@ def _format_results(results: Sequence[_GSResult]) -> pd.DataFrame:
for result in results:
row: dict[str, str | int | float] = {}
row["metalearner"] = result.metalearner.__class__.__name__
nuisance_models = (
nuisance_models = sorted(
set(result.metalearner.nuisance_model_specifications().keys())
- result.metalearner._prefitted_nuisance_models
)
treatment_models = set(
result.metalearner.treatment_model_specifications().keys()
treatment_models = sorted(
set(result.metalearner.treatment_model_specifications().keys())
)
for model_kind in nuisance_models:
row[model_kind] = result.metalearner.nuisance_model_factory[
Expand All @@ -115,13 +115,16 @@ def _format_results(results: Sequence[_GSResult]) -> pd.DataFrame:
row[f"test_{name}"] = value
rows.append(row)
df = pd.DataFrame(rows)
index_columns = [
c
for c in df.columns
if not c.endswith("_time")
and not c.startswith("train_")
and not c.startswith("test_")
]
sorted_cols = sorted(df.columns)
index_columns = ["metalearner"]
for model_kind in nuisance_models:
for c in sorted_cols:
if c.startswith(model_kind):
index_columns.append(c)
for model_kind in treatment_models:
for c in sorted_cols:
if c.startswith(model_kind):
index_columns.append(c)
df = df.set_index(index_columns)
return df

Expand Down
20 changes: 20 additions & 0 deletions tests/test_drlearner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) QuantCo 2024-2024
# SPDX-License-Identifier: BSD-3-Clause

from sklearn.linear_model import LinearRegression, LogisticRegression

from metalearners.drlearner import DRLearner


def test_adaptive_clipping_smoke(dummy_dataset):
X, y, w = dummy_dataset
ml = DRLearner(
False,
2,
LinearRegression,
LinearRegression,
LogisticRegression,
n_folds=2,
adaptive_clipping=True,
)
ml.fit(X, y, w)
41 changes: 34 additions & 7 deletions tests/test_grid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,27 @@
{"base_model": [LinearRegression, LGBMRegressor]},
{"base_model": {"LGBMRegressor": {"n_estimators": [1, 2]}}},
3,
3,
["metalearner", "base_model", "base_model_n_estimators"],
),
(
SLearner,
True,
{"base_model": [LogisticRegression, LGBMClassifier]},
{"base_model": {"LGBMClassifier": {"n_estimators": [1, 2]}}},
3,
3,
["metalearner", "base_model", "base_model_n_estimators"],
),
(
TLearner,
False,
{"variant_outcome_model": [LinearRegression, LGBMRegressor]},
{"variant_outcome_model": {"LGBMRegressor": {"n_estimators": [1, 2, 3]}}},
4,
3,
[
"metalearner",
"variant_outcome_model",
"variant_outcome_model_n_estimators",
],
),
(
XLearner,
Expand All @@ -58,7 +62,16 @@
"treatment_effect_model": {"LGBMRegressor": {"n_estimators": [1]}},
},
6,
8,
[
"metalearner",
"propensity_model",
"propensity_model_n_estimators",
"variant_outcome_model",
"control_effect_model",
"control_effect_model_n_estimators",
"treatment_effect_model",
"treatment_effect_model_n_estimators",
],
),
(
RLearner,
Expand All @@ -75,7 +88,15 @@
},
},
9,
7,
[
"metalearner",
"outcome_model",
"propensity_model",
"propensity_model_n_estimators",
"treatment_model",
"treatment_model_learning_rate",
"treatment_model_n_estimators",
],
),
(
DRLearner,
Expand All @@ -89,7 +110,13 @@
"propensity_model": {"LGBMClassifier": {"n_estimators": [1, 2, 3, 4]}},
},
4,
5,
[
"metalearner",
"propensity_model",
"propensity_model_n_estimators",
"variant_outcome_model",
"treatment_model",
],
),
],
)
Expand Down Expand Up @@ -125,7 +152,7 @@ def test_metalearnergridsearch_smoke(
gs.fit(X, y, w, X_test, y_test, w_test)
assert gs.results_ is not None
assert gs.results_.shape[0] == expected_n_configs
assert len(gs.results_.index.names) == expected_index_cols
assert gs.results_.index.names == expected_index_cols

train_scores_cols = set(
c[6:] for c in list(gs.results_.columns) if c.startswith("train_")
Expand Down

0 comments on commit ae2d825

Please sign in to comment.