From ab9c7499ed610234417d4cc3366bfb17ae8bd853 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Tue, 21 Mar 2023 00:34:48 -0400 Subject: [PATCH 1/9] Fix status checks in CI workflow Signed-off-by: Keith Battocchi --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 82ba25bc6..0e67498c7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -224,7 +224,7 @@ jobs: steps: - run: exit 1 name: At least one check failed or was cancelled - if: ${{ !(success()) }} + if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }} - run: exit 0 name: All checks passed - if: ${{ success() }} + if: ${{ !(contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')) }} From 684362fb73dcbada7b2f9a16e6e405b99d827bac Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 26 Apr 2023 12:38:18 -0400 Subject: [PATCH 2/9] Fix #760 Signed-off-by: Keith Battocchi --- econml/_ortho_learner.py | 2 +- econml/tests/test_dml.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index fdc9e7693..39a300ade 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -899,7 +899,7 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): nuisances = [np.zeros((n_iters * n_splits,) + nuis.shape) for nuis in nuisance_temp] for it, nuis in enumerate(nuisance_temp): - nuisances[it][i * n_iters + j] = nuis + nuisances[it][j * n_iters + i] = nuis for it in range(len(nuisances)): nuisances[it] = np.mean(nuisances[it], axis=0) diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py index 8105f7ec7..57b5c3ec4 100644 --- a/econml/tests/test_dml.py +++ b/econml/tests/test_dml.py @@ -1095,6 +1095,7 @@ def test_nuisance_scores(self): est.fit(y, T, X=X, W=W) assert len(est.nuisance_scores_t) == len(est.nuisance_scores_y) == mc_iters assert len(est.nuisance_scores_t[0]) == len(est.nuisance_scores_y[0]) == cv + est.score(y, T, X=X, W=W) def test_categories(self): dmls = [LinearDML, SparseLinearDML] From 26f529b283e64ad24d38f27842f8f3221268dff8 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 3 May 2023 12:42:15 -0400 Subject: [PATCH 3/9] Enable compatability with pandas 2.0 Signed-off-by: Keith Battocchi --- econml/data/dynamic_panel_dgp.py | 2 +- .../Causal Interpretation for Ames Housing Price.ipynb | 2 +- .../Causal Interpretation for Employee Attrition Dataset.ipynb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/econml/data/dynamic_panel_dgp.py b/econml/data/dynamic_panel_dgp.py index 82b842912..cb7ffa17c 100644 --- a/econml/data/dynamic_panel_dgp.py +++ b/econml/data/dynamic_panel_dgp.py @@ -135,7 +135,7 @@ def simulate_residuals(ind): def simulate_residuals_all(res_df): - res_df_new = res_df.copy(deep=True) + res_df_new = res_df.astype(dtype='float64', copy=True, errors='raise') for i in range(res_df.shape[1]): res_df_new.iloc[:, i] = simulate_residuals(i) # demean the new residual again diff --git a/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb b/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb index 3b7b7d936..504d9e0f7 100644 --- a/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb +++ b/notebooks/Solutions/Causal Interpretation for Ames Housing Price.ipynb @@ -598,7 +598,7 @@ "X = Xy.drop(columns = 'SalePrice')\n", "X_ohe = (\n", " X\n", - " .pipe(pd.get_dummies, prefix_sep = '_OHE_', columns = categorical)\n", + " .pipe(pd.get_dummies, prefix_sep = '_OHE_', columns = categorical, dtype='uint8')\n", ")\n", "y = Xy['SalePrice']" ] diff --git a/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb b/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb index 24f00b8c2..94ba335da 100644 --- a/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb +++ b/notebooks/Solutions/Causal Interpretation for Employee Attrition Dataset.ipynb @@ -432,7 +432,7 @@ "outputs": [], "source": [ "categorical = []\n", - "for col, value in attritionXData.iteritems():\n", + "for col, value in attritionXData.items():\n", " if value.dtype == \"object\":\n", " categorical.append(col)\n", "\n", From f37dd76653d275b8ed6f016c4f462e60c8dc44c4 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 3 May 2023 13:30:18 -0400 Subject: [PATCH 4/9] Bump supported shap version limit Signed-off-by: Keith Battocchi --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 2e0ca2c37..440cad679 100644 --- a/setup.cfg +++ b/setup.cfg @@ -39,7 +39,7 @@ install_requires = joblib >= 0.13.0 statsmodels >= 0.10 pandas - shap >= 0.38.1, < 0.41.0 + shap >= 0.38.1, < 0.42.0 lightgbm test_suite = econml.tests tests_require = From 8b2a47db2efe989344d519a5a3c5e785c9eb1ab9 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 3 May 2023 15:58:29 -0400 Subject: [PATCH 5/9] Use verbose pip install when debugging workflows Signed-off-by: Keith Battocchi --- .github/workflows/ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0e67498c7..413635a8f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -124,7 +124,8 @@ jobs: - run: sudo apt-get -yq install graphviz name: Install graphviz if: ${{ matrix.install_graphviz }} - - run: pip install -e .${{ matrix.extras }} + # Add verbose flag to pip installation if in debug mode + - run: pip install -e .${{ matrix.extras }} ${{ fromJSON('["","-v"]')[runner.debug] }} name: Install econml - run: pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm name: Install test and notebook requirements @@ -183,7 +184,8 @@ jobs: python-version: ${{ matrix.python-version }} - run: python -m pip install --upgrade pip && pip install --upgrade setuptools name: Ensure latest pip and setuptools - - run: pip install -e .${{ matrix.extras }} + # Add verbose flag to pip installation if in debug mode + - run: pip install -e .${{ matrix.extras }} ${{ fromJSON('["","-v"]')[runner.debug] }} name: Install econml - run: pip install pytest pytest-runner coverage name: Install pytest From 4e1845bbbaad0dc4a98556e01ef74ebbf24d754f Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Fri, 5 May 2023 17:33:50 -0400 Subject: [PATCH 6/9] Fix numpy 1.24 compatibility Signed-off-by: Keith Battocchi --- econml/_ensemble/_ensemble.py | 4 ++-- setup.cfg | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/econml/_ensemble/_ensemble.py b/econml/_ensemble/_ensemble.py index ff833f9ec..cdc23da4c 100644 --- a/econml/_ensemble/_ensemble.py +++ b/econml/_ensemble/_ensemble.py @@ -158,8 +158,8 @@ def _partition_estimators(n_estimators, n_jobs): # Partition estimators between jobs n_estimators_per_job = np.full(n_jobs, n_estimators // n_jobs, - dtype=np.int) - n_estimators_per_job[:n_estimators % n_jobs] += 1 + dtype=int) + n_estimators_per_job[: n_estimators % n_jobs] += 1 starts = np.cumsum(n_estimators_per_job) return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist() diff --git a/setup.cfg b/setup.cfg index 440cad679..70529d9a0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -58,6 +58,8 @@ tf = tensorflow > 1.10, < 2.3;python_version < '3.9' ; Version capped due to tensorflow incompatibility protobuf < 4 + ; Version capped due to tensorflow incompatibility + numpy < 1.24 plt = graphviz ; Version capped due to shap incompatibility @@ -70,6 +72,8 @@ all = tensorflow > 1.10, < 2.3 ; Version capped due to tensorflow incompatibility protobuf < 4 + ; Version capped due to tensorflow incompatibility + numpy < 1.24 ; Version capped due to shap incompatibility matplotlib < 3.6.0 dowhy < 0.9 From 1fc53f675a89a0eef54733570a6d8b0e22391ac5 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Tue, 28 Mar 2023 18:00:46 -0400 Subject: [PATCH 7/9] Drop support for sklearn<1.0 Signed-off-by: Keith Battocchi --- econml/cate_interpreter/_interpreters.py | 3 +- econml/sklearn_extensions/linear_model.py | 51 ----------------------- setup.cfg | 2 +- 3 files changed, 2 insertions(+), 54 deletions(-) diff --git a/econml/cate_interpreter/_interpreters.py b/econml/cate_interpreter/_interpreters.py index 82a47ef0c..67dd78888 100644 --- a/econml/cate_interpreter/_interpreters.py +++ b/econml/cate_interpreter/_interpreters.py @@ -4,7 +4,6 @@ import abc import numbers import numpy as np -from packaging import version import sklearn from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier from sklearn.utils import check_array @@ -151,7 +150,7 @@ def __init__(self, *, self.include_uncertainty = include_model_uncertainty self.uncertainty_level = uncertainty_level self.uncertainty_only_on_leaves = uncertainty_only_on_leaves - self.criterion = "squared_error" if version.parse(sklearn.__version__) >= version.parse("1.0") else "mse" + self.criterion = "squared_error" self.splitter = splitter self.max_depth = max_depth self.min_samples_split = min_samples_split diff --git a/econml/sklearn_extensions/linear_model.py b/econml/sklearn_extensions/linear_model.py index a7f5d029c..da2218473 100644 --- a/econml/sklearn_extensions/linear_model.py +++ b/econml/sklearn_extensions/linear_model.py @@ -38,51 +38,6 @@ from joblib import Parallel, delayed -# TODO: once we drop support for sklearn < 1.0, we can remove this -def _add_normalize(to_wrap): - """ - Add a fictitious "normalize" argument to linear model initializer signatures. - - This is necessary for their get_params to play nicely with some other sklearn-internal methods. - - Note that directly adding a **params argument to the ordinary initializer will not work, - because get_params explicitly looks only at the initializer signature arguments that are not - varargs or varkeywords, so we need to modify the signature of the initializer to include the - "normalize" argument. - """ - # if we're decorating a class, just update the __init__ method, - # so that the result is still a class instead of a wrapper method - if isinstance(to_wrap, type): - import sklearn - from packaging import version - - if version.parse(sklearn.__version__) >= version.parse("1.0"): - # normalize was deprecated or removed; don't need to do anything - return to_wrap - - else: - from inspect import Parameter, signature - from functools import wraps - - old_init = to_wrap.__init__ - - @wraps(old_init) - def new_init(self, *args, normalize=False, **kwargs): - if normalize is not False: - warnings.warn("normalize is deprecated and will be ignored", stacklevel=2) - return old_init(self, *args, **kwargs) - - sig = signature(old_init) - sig = sig.replace(parameters=[*sig.parameters.values(), - Parameter("normalize", kind=Parameter.KEYWORD_ONLY, default=False)]) - - new_init.__signature__ = sig - to_wrap.__init__ = new_init - return to_wrap - else: - raise ValueError("This decorator was applied to a method, but is intended to be applied only to types.") - - def _weighted_check_cv(cv=5, y=None, classifier=False, random_state=None): cv = 5 if cv is None else cv if isinstance(cv, numbers.Integral): @@ -176,7 +131,6 @@ def _fit_weighted_linear_model(self, X, y, sample_weight, check_input=None): super().fit(**fit_params) -@_add_normalize class WeightedLasso(WeightedModelMixin, Lasso): """Version of sklearn Lasso that accepts weights. @@ -282,7 +236,6 @@ def fit(self, X, y, sample_weight=None, check_input=True): return self -@_add_normalize class WeightedMultiTaskLasso(WeightedModelMixin, MultiTaskLasso): """Version of sklearn MultiTaskLasso that accepts weights. @@ -372,7 +325,6 @@ def fit(self, X, y, sample_weight=None): return self -@_add_normalize class WeightedLassoCV(WeightedModelMixin, LassoCV): """Version of sklearn LassoCV that accepts weights. @@ -491,7 +443,6 @@ def fit(self, X, y, sample_weight=None): return self -@_add_normalize class WeightedMultiTaskLassoCV(WeightedModelMixin, MultiTaskLassoCV): """Version of sklearn MultiTaskLassoCV that accepts weights. @@ -631,7 +582,6 @@ def _get_theta_coefs_and_tau_sq(i, X, sample_weight, alpha_cov, n_alphas_cov, ma return coefs, tausq -@_add_normalize class DebiasedLasso(WeightedLasso): """Debiased Lasso model. @@ -977,7 +927,6 @@ def _get_unscaled_coef_var(self, X, theta_hat, sample_weight): return _unscaled_coef_var -@_add_normalize class MultiOutputDebiasedLasso(MultiOutputRegressor): """Debiased MultiOutputLasso model. diff --git a/setup.cfg b/setup.cfg index 70529d9a0..067333d5a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,7 +34,7 @@ packages = find_namespace: install_requires = numpy scipy > 1.4.0 - scikit-learn > 0.22.0, < 1.3 + scikit-learn >= 1.0, < 1.3 sparse joblib >= 0.13.0 statsmodels >= 0.10 From f98847a29005cafdbc321e7f7bc49740f052437a Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Fri, 5 May 2023 17:46:47 -0400 Subject: [PATCH 8/9] Cancel stale PR workflow runs Signed-off-by: Keith Battocchi --- .github/workflows/ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 413635a8f..a3ca5d22e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,6 +12,11 @@ on: default: '' type: string +# Only run once per PR, canceling any previous runs +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + # Precompute the ref if the workflow was triggered by a workflow dispatch rather than copying this logic repeatedly env: ref: ${{ github.event_name == 'workflow_dispatch' && inputs.ref || null }} From e8a7df981391345671f64bd539166ab27bc228fa Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Mon, 8 May 2023 13:58:47 -0400 Subject: [PATCH 9/9] Check notebook test impact Signed-off-by: Keith Battocchi --- .github/workflows/ci.yml | 117 ++------------------------------------- 1 file changed, 4 insertions(+), 113 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a3ca5d22e..b2b1e115e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -77,24 +77,6 @@ jobs: buildNbs: ${{ steps.eval.outputs.buildNbs }} testCode: ${{ steps.eval.outputs.testCode }} - lint: - name: Lint code - needs: [eval] - if: ${{ needs.eval.outputs.testCode == 'True' }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - name: Checkout repository - with: - ref: ${{ env.ref }} - - uses: actions/setup-python@v4 - name: Setup Python - with: - python-version: 3.9 - - run: python -m pip install --upgrade pip && pip install --upgrade setuptools - name: Ensure latest pip and setuptools - - run: 'pip install pycodestyle && pycodestyle econml' - notebooks: name: Run notebooks needs: [eval] @@ -103,6 +85,8 @@ jobs: strategy: matrix: kind: [except customer scenarios, customer scenarios] + nbconvert: ['nbconvert', '"nbconvert<7.4"'] + ipykernel: ['ipykernel', '"ipykernel<6.23"'] include: - kind: "except customer scenarios" extras: "[tf,plt]" @@ -132,7 +116,7 @@ jobs: # Add verbose flag to pip installation if in debug mode - run: pip install -e .${{ matrix.extras }} ${{ fromJSON('["","-v"]')[runner.debug] }} name: Install econml - - run: pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm + - run: pip install pytest pytest-runner jupyter jupyter-client ${{ matrix.nbconvert }} ${{ matrix.ipykernel }} nbformat seaborn xgboost tqdm name: Install test and notebook requirements - run: pip list name: List installed packages @@ -141,97 +125,4 @@ jobs: env: PYTEST_ADDOPTS: '-m "notebook"' NOTEBOOK_DIR_PATTERN: ${{ matrix.pattern }} - - tests: - name: "Run tests" - needs: [eval] - if: ${{ needs.eval.outputs.testCode == 'True' }} - strategy: - matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - python-version: [3.6, 3.7, 3.8, 3.9] - kind: [serial, other, dml, main, treatment] - exclude: - # Serial tests fail randomly on mac sometimes, so we don't run them there - - os: macos-latest - kind: serial - # Python 3.6 isn't supported on ubuntu-latest - - os: ubuntu-latest - python-version: 3.6 - - # Assign the correct package and testing options for each kind of test - include: - - kind: serial - opts: '-m "serial" -n 1' - extras: "[tf,plt]" - - kind: other - opts: '-m "cate_api" -n auto' - extras: "[tf,plt]" - - kind: dml - opts: '-m "dml"' - extras: "[tf,plt]" - - kind: main - opts: '-m "not (notebook or automl or dml or serial or cate_api or treatment_featurization)" -n 2' - extras: "[tf,plt,dowhy]" - - kind: treatment - opts: '-m "treatment_featurization" -n auto' - extras: "[tf,plt]" - fail-fast: false - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - name: Checkout repository - with: - ref: ${{ env.ref }} - - uses: actions/setup-python@v4 - name: Setup Python - with: - python-version: ${{ matrix.python-version }} - - run: python -m pip install --upgrade pip && pip install --upgrade setuptools - name: Ensure latest pip and setuptools - # Add verbose flag to pip installation if in debug mode - - run: pip install -e .${{ matrix.extras }} ${{ fromJSON('["","-v"]')[runner.debug] }} - name: Install econml - - run: pip install pytest pytest-runner coverage - name: Install pytest - - run: python setup.py pytest - name: Run tests - env: - PYTEST_ADDOPTS: ${{ matrix.opts }} - COVERAGE_PROCESS_START: 'setup.cfg' - # todo: publish test results, coverage info - - build: - name: Build package - needs: [eval] - if: ${{ needs.eval.outputs.testCode == 'True' }} - uses: ./.github/workflows/publish-package.yml - with: - publish: false - repository: testpypi - # don't have access to env context here for some reason - ref: ${{ github.event_name == 'workflow_dispatch' && inputs.ref || null }} - - docs: - name: Build documentation - needs: [eval] - if: ${{ needs.eval.outputs.buildDocs == 'True' }} - uses: ./.github/workflows/publish-documentation.yml - with: - publish: false - environment: test - # don't have access to env context here for some reason - ref: ${{ github.event_name == 'workflow_dispatch' && inputs.ref || null }} - - verify: - name: Verify CI checks - needs: [lint, notebooks, tests, build, docs] - if: always() - runs-on: ubuntu-latest - steps: - - run: exit 1 - name: At least one check failed or was cancelled - if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }} - - run: exit 0 - name: All checks passed - if: ${{ !(contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')) }} + \ No newline at end of file