py-why · kbattocchi · Mar 21, 2023 · Apr 26, 2023 · May 3, 2023 · May 3, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,6 +12,11 @@ on:
         default: ''
         type: string
 
+# Only run once per PR, canceling any previous runs
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
 # Precompute the ref if the workflow was triggered by a workflow dispatch rather than copying this logic repeatedly
 env:
   ref: ${{ github.event_name == 'workflow_dispatch' && inputs.ref || null }}
@@ -72,24 +77,6 @@ jobs:
       buildNbs: ${{ steps.eval.outputs.buildNbs }}
       testCode: ${{ steps.eval.outputs.testCode }}
 
-  lint:
-    name: Lint code
-    needs: [eval]
-    if: ${{ needs.eval.outputs.testCode == 'True' }}
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-      name: Checkout repository
-      with:
-        ref: ${{ env.ref }}
-    - uses: actions/setup-python@v4
-      name: Setup Python
-      with:
-        python-version: 3.9
-    - run: python -m pip install --upgrade pip && pip install --upgrade setuptools
-      name: Ensure latest pip and setuptools
-    - run: 'pip install pycodestyle && pycodestyle econml'
-
   notebooks:
     name: Run notebooks
     needs: [eval]
@@ -98,6 +85,8 @@ jobs:
     strategy:
       matrix:
         kind: [except customer scenarios, customer scenarios]
+        nbconvert: ['nbconvert', '"nbconvert<7.4"']
+        ipykernel: ['ipykernel', '"ipykernel<6.23"']
         include:
         - kind: "except customer scenarios"
           extras: "[tf,plt]"
@@ -124,9 +113,10 @@ jobs:
     - run: sudo apt-get -yq install graphviz
       name: Install graphviz
       if: ${{ matrix.install_graphviz }}
-    - run: pip install -e .${{ matrix.extras }}
+    # Add verbose flag to pip installation if in debug mode
+    - run: pip install -e .${{ matrix.extras }} ${{ fromJSON('["","-v"]')[runner.debug] }}
       name: Install econml
-    - run: pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm
+    - run: pip install pytest pytest-runner jupyter jupyter-client ${{ matrix.nbconvert }} ${{ matrix.ipykernel }} nbformat seaborn xgboost tqdm
       name: Install test and notebook requirements
     - run: pip list
       name: List installed packages
@@ -135,96 +125,4 @@ jobs:
       env:
         PYTEST_ADDOPTS: '-m "notebook"'
         NOTEBOOK_DIR_PATTERN: ${{ matrix.pattern }}
-
-  tests:
-    name: "Run tests"
-    needs: [eval]
-    if: ${{ needs.eval.outputs.testCode == 'True' }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: [3.6, 3.7, 3.8, 3.9]
-        kind: [serial, other, dml, main, treatment]
-        exclude:
-          # Serial tests fail randomly on mac sometimes, so we don't run them there
-          - os: macos-latest
-            kind: serial
-          # Python 3.6 isn't supported on ubuntu-latest
-          - os: ubuntu-latest
-            python-version: 3.6
-
-        # Assign the correct package and testing options for each kind of test
-        include:
-          - kind: serial
-            opts: '-m "serial" -n 1'
-            extras: "[tf,plt]"
-          - kind: other
-            opts: '-m "cate_api" -n auto'
-            extras: "[tf,plt]"
-          - kind: dml
-            opts: '-m "dml"'
-            extras: "[tf,plt]"
-          - kind: main
-            opts: '-m "not (notebook or automl or dml or serial or cate_api or treatment_featurization)" -n 2'
-            extras: "[tf,plt,dowhy]"
-          - kind: treatment
-            opts: '-m "treatment_featurization" -n auto'
-            extras: "[tf,plt]"
-      fail-fast: false    
-    runs-on: ${{ matrix.os }}
-    steps:
-    - uses: actions/checkout@v3
-      name: Checkout repository
-      with:
-        ref: ${{ env.ref }}
-    - uses: actions/setup-python@v4
-      name: Setup Python
-      with:
-        python-version: ${{ matrix.python-version }}
-    - run: python -m pip install --upgrade pip && pip install --upgrade setuptools
-      name: Ensure latest pip and setuptools
-    - run: pip install -e .${{ matrix.extras }}
-      name: Install econml
-    - run: pip install pytest pytest-runner coverage
-      name: Install pytest
-    - run: python setup.py pytest
-      name: Run tests
-      env:
-        PYTEST_ADDOPTS: ${{ matrix.opts }}
-        COVERAGE_PROCESS_START: 'setup.cfg'
-    # todo: publish test results, coverage info
-
-  build:
-    name: Build package
-    needs: [eval]
-    if: ${{ needs.eval.outputs.testCode == 'True' }}
-    uses: ./.github/workflows/publish-package.yml
-    with:
-      publish: false
-      repository: testpypi
-      # don't have access to env context here for some reason
-      ref: ${{ github.event_name == 'workflow_dispatch' && inputs.ref || null }}
-
-  docs:
-    name: Build documentation
-    needs: [eval]
-    if: ${{ needs.eval.outputs.buildDocs == 'True' }}
-    uses: ./.github/workflows/publish-documentation.yml
-    with:
-      publish: false
-      environment: test
-      # don't have access to env context here for some reason
-      ref: ${{ github.event_name == 'workflow_dispatch' && inputs.ref || null }}
-
-  verify:
-    name: Verify CI checks
-    needs: [lint, notebooks, tests, build, docs]
-    if: always()
-    runs-on: ubuntu-latest
-    steps:
-    - run: exit 1
-      name: At least one check failed or was cancelled
-      if: ${{ !(success()) }}
-    - run: exit 0
-      name: All checks passed
-      if: ${{ success() }}
+
diff --git a/econml/_ensemble/_ensemble.py b/econml/_ensemble/_ensemble.py
@@ -158,8 +158,8 @@ def _partition_estimators(n_estimators, n_jobs):
 
     # Partition estimators between jobs
     n_estimators_per_job = np.full(n_jobs, n_estimators // n_jobs,
-                                   dtype=np.int)
-    n_estimators_per_job[:n_estimators % n_jobs] += 1
+                                   dtype=int)
+    n_estimators_per_job[: n_estimators % n_jobs] += 1
     starts = np.cumsum(n_estimators_per_job)
 
     return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist()
diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py
@@ -899,7 +899,7 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None):
                     nuisances = [np.zeros((n_iters * n_splits,) + nuis.shape) for nuis in nuisance_temp]
 
                 for it, nuis in enumerate(nuisance_temp):
-                    nuisances[it][i * n_iters + j] = nuis
+                    nuisances[it][j * n_iters + i] = nuis
 
         for it in range(len(nuisances)):
             nuisances[it] = np.mean(nuisances[it], axis=0)

diff --git a/econml/cate_interpreter/_interpreters.py b/econml/cate_interpreter/_interpreters.py
@@ -4,7 +4,6 @@
 import abc
 import numbers
 import numpy as np
-from packaging import version
 import sklearn
 from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
 from sklearn.utils import check_array
@@ -151,7 +150,7 @@ def __init__(self, *,
         self.include_uncertainty = include_model_uncertainty
         self.uncertainty_level = uncertainty_level
         self.uncertainty_only_on_leaves = uncertainty_only_on_leaves
-        self.criterion = "squared_error" if version.parse(sklearn.__version__) >= version.parse("1.0") else "mse"
+        self.criterion = "squared_error"
         self.splitter = splitter
         self.max_depth = max_depth
         self.min_samples_split = min_samples_split

diff --git a/econml/data/dynamic_panel_dgp.py b/econml/data/dynamic_panel_dgp.py
@@ -135,7 +135,7 @@ def simulate_residuals(ind):
 
 
 def simulate_residuals_all(res_df):
-    res_df_new = res_df.copy(deep=True)
+    res_df_new = res_df.astype(dtype='float64', copy=True, errors='raise')
     for i in range(res_df.shape[1]):
         res_df_new.iloc[:, i] = simulate_residuals(i)
     # demean the new residual again

diff --git a/econml/sklearn_extensions/linear_model.py b/econml/sklearn_extensions/linear_model.py
@@ -38,51 +38,6 @@
 from joblib import Parallel, delayed
 
 
-# TODO: once we drop support for sklearn < 1.0, we can remove this
-def _add_normalize(to_wrap):
-    """
-    Add a fictitious "normalize" argument to linear model initializer signatures.
-
-    This is necessary for their get_params to play nicely with some other sklearn-internal methods.
-
-    Note that directly adding a **params argument to the ordinary initializer will not work,
-    because get_params explicitly looks only at the initializer signature arguments that are not
-    varargs or varkeywords, so we need to modify the signature of the initializer to include the
-    "normalize" argument.
-    """
-    # if we're decorating a class, just update the __init__ method,
-    # so that the result is still a class instead of a wrapper method
-    if isinstance(to_wrap, type):
-        import sklearn
-        from packaging import version
-
-        if version.parse(sklearn.__version__) >= version.parse("1.0"):
-            # normalize was deprecated or removed; don't need to do anything
-            return to_wrap
-
-        else:
-            from inspect import Parameter, signature
-            from functools import wraps
-
-            old_init = to_wrap.__init__
-
-            @wraps(old_init)
-            def new_init(self, *args, normalize=False, **kwargs):
-                if normalize is not False:
-                    warnings.warn("normalize is deprecated and will be ignored", stacklevel=2)
-                return old_init(self, *args, **kwargs)
-
-            sig = signature(old_init)
-            sig = sig.replace(parameters=[*sig.parameters.values(),
-                                          Parameter("normalize", kind=Parameter.KEYWORD_ONLY, default=False)])
-
-            new_init.__signature__ = sig
-            to_wrap.__init__ = new_init
-            return to_wrap
-    else:
-        raise ValueError("This decorator was applied to a method, but is intended to be applied only to types.")
-
-
 def _weighted_check_cv(cv=5, y=None, classifier=False, random_state=None):
     cv = 5 if cv is None else cv
     if isinstance(cv, numbers.Integral):
@@ -176,7 +131,6 @@ def _fit_weighted_linear_model(self, X, y, sample_weight, check_input=None):
             super().fit(**fit_params)
 
 
-@_add_normalize
 class WeightedLasso(WeightedModelMixin, Lasso):
     """Version of sklearn Lasso that accepts weights.
 
@@ -282,7 +236,6 @@ def fit(self, X, y, sample_weight=None, check_input=True):
         return self
 
 
-@_add_normalize
 class WeightedMultiTaskLasso(WeightedModelMixin, MultiTaskLasso):
     """Version of sklearn MultiTaskLasso that accepts weights.
 
@@ -372,7 +325,6 @@ def fit(self, X, y, sample_weight=None):
         return self
 
 
-@_add_normalize
 class WeightedLassoCV(WeightedModelMixin, LassoCV):
     """Version of sklearn LassoCV that accepts weights.
 
@@ -491,7 +443,6 @@ def fit(self, X, y, sample_weight=None):
         return self
 
 
-@_add_normalize
 class WeightedMultiTaskLassoCV(WeightedModelMixin, MultiTaskLassoCV):
     """Version of sklearn MultiTaskLassoCV that accepts weights.
 
@@ -631,7 +582,6 @@ def _get_theta_coefs_and_tau_sq(i, X, sample_weight, alpha_cov, n_alphas_cov, ma
     return coefs, tausq
 
 
-@_add_normalize
 class DebiasedLasso(WeightedLasso):
     """Debiased Lasso model.
 
@@ -977,7 +927,6 @@ def _get_unscaled_coef_var(self, X, theta_hat, sample_weight):
         return _unscaled_coef_var
 
 
-@_add_normalize
 class MultiOutputDebiasedLasso(MultiOutputRegressor):
     """Debiased MultiOutputLasso model.
 

diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py
@@ -1095,6 +1095,7 @@ def test_nuisance_scores(self):
                 est.fit(y, T, X=X, W=W)
                 assert len(est.nuisance_scores_t) == len(est.nuisance_scores_y) == mc_iters
                 assert len(est.nuisance_scores_t[0]) == len(est.nuisance_scores_y[0]) == cv
+                est.score(y, T, X=X, W=W)
 
     def test_categories(self):
         dmls = [LinearDML, SparseLinearDML]

diff --git a/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb b/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb
@@ -598,7 +598,7 @@
     "X = Xy.drop(columns = 'SalePrice')\n",
     "X_ohe = (\n",
     "    X\n",
-    "    .pipe(pd.get_dummies, prefix_sep = '_OHE_', columns = categorical)\n",
+    "    .pipe(pd.get_dummies, prefix_sep = '_OHE_', columns = categorical, dtype='uint8')\n",
     ")\n",
     "y = Xy['SalePrice']"
    ]

diff --git a/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb b/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb
@@ -432,7 +432,7 @@
    "outputs": [],
    "source": [
     "categorical = []\n",
-    "for col, value in attritionXData.iteritems():\n",
+    "for col, value in attritionXData.items():\n",
     "    if value.dtype == \"object\":\n",
     "        categorical.append(col)\n",
     "\n",

diff --git a/setup.cfg b/setup.cfg
@@ -34,12 +34,12 @@ packages = find_namespace:
 install_requires =
     numpy
     scipy > 1.4.0
-    scikit-learn > 0.22.0, < 1.3
+    scikit-learn >= 1.0, < 1.3
     sparse
     joblib >= 0.13.0
     statsmodels >= 0.10
     pandas
-    shap >= 0.38.1, < 0.41.0
+    shap >= 0.38.1, < 0.42.0
     lightgbm
 test_suite = econml.tests
 tests_require =
@@ -58,6 +58,8 @@ tf =
     tensorflow > 1.10, < 2.3;python_version < '3.9'
     ; Version capped due to tensorflow incompatibility
     protobuf < 4
+    ; Version capped due to tensorflow incompatibility
+    numpy < 1.24
 plt =
     graphviz
     ; Version capped due to shap incompatibility
@@ -70,6 +72,8 @@ all =
     tensorflow > 1.10, < 2.3
     ; Version capped due to tensorflow incompatibility
     protobuf < 4
+    ; Version capped due to tensorflow incompatibility
+    numpy < 1.24
     ; Version capped due to shap incompatibility
     matplotlib < 3.6.0
     dowhy < 0.9