diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index abf988d..1c3ec0b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: strategy: fail-fast: false matrix: - PYTHON_VERSION: ["3.9", "3.10", "3.11", "3.12"] + PYTHON_VERSION: ["3.10", "3.11", "3.12"] steps: - name: Checkout branch uses: actions/checkout@v4 diff --git a/conda.recipe/recipe.yaml b/conda.recipe/recipe.yaml index 3010edf..6a354e1 100644 --- a/conda.recipe/recipe.yaml +++ b/conda.recipe/recipe.yaml @@ -19,11 +19,11 @@ build: requirements: host: - - python >=3.9 + - python >=3.10 - pip - setuptools-scm run: - - python >=3.9 + - python >=3.10 - scikit-learn >=1.3 - pandas - numpy diff --git a/environment.yml b/environment.yml index 5ddc8e8..e8af3b3 100644 --- a/environment.yml +++ b/environment.yml @@ -9,7 +9,7 @@ dependencies: - git_root ## Python - pip - - python>=3.9 + - python>=3.10 - setuptools-scm - mypy=1.9.0 - setuptools>=61 # Adds support for pyproject.toml package declaration. diff --git a/metalearners/_utils.py b/metalearners/_utils.py index 930ad82..7e1ab54 100644 --- a/metalearners/_utils.py +++ b/metalearners/_utils.py @@ -3,14 +3,15 @@ import operator from operator import le, lt -from typing import Optional, Protocol, Union +from typing import Protocol, Union import numpy as np import pandas as pd from sklearn.base import check_array, check_X_y -Vector = Union[pd.Series, np.ndarray] -Matrix = Union[pd.DataFrame, np.ndarray] +# ruff is not happy about the usage of Union. +Vector = Union[pd.Series, np.ndarray] # noqa +Matrix = Union[pd.DataFrame, np.ndarray] # noqa default_rng = np.random.default_rng() @@ -38,7 +39,7 @@ def index_matrix(matrix: Matrix, rows: Vector) -> Matrix: def validate_number_positive( - value: Union[int, float], name: str, strict: bool = False + value: int | float, name: str, strict: bool = False ) -> None: if strict: comparison = operator.lt @@ -50,10 +51,10 @@ def validate_number_positive( def check_propensity_score( propensity_scores: Matrix, - features: Optional[Matrix] = None, + features: Matrix | None = None, n_variants: int = 2, sum_to_one: bool = False, - check_kwargs: Optional[dict] = None, + check_kwargs: dict | None = None, sum_tolerance: float = 0.001, ) -> None: """Ensure propensity scores match assumptions. @@ -100,7 +101,7 @@ def check_propensity_score( def convert_and_pad_propensity_score( - propensity_scores: Union[Vector, Matrix], n_variants: int + propensity_scores: Vector | Matrix, n_variants: int ) -> np.ndarray: """Convert to ``np.ndarray`` and pad propensity scores, if necessary. diff --git a/metalearners/cross_fit_estimator.py b/metalearners/cross_fit_estimator.py index 1e20a36..e3bd0df 100644 --- a/metalearners/cross_fit_estimator.py +++ b/metalearners/cross_fit_estimator.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: LicenseRef-QuantCo from dataclasses import dataclass, field -from typing import Literal, Optional, Union +from typing import Literal import numpy as np from sklearn.base import is_classifier @@ -26,7 +26,7 @@ PredictMethod = Literal["predict", "predict_proba"] -def _validate_oos_method(oos_method: Optional[OosMethod], enable_overall: bool) -> None: +def _validate_oos_method(oos_method: OosMethod | None, enable_overall: bool) -> None: if oos_method not in _OOS_WHITELIST: raise ValueError( f"oos_method {oos_method} not supported. Supported values are " @@ -69,23 +69,23 @@ class CrossFitEstimator: estimator_factory: type[_ScikitModel] estimator_params: dict = field(default_factory=dict) enable_overall: bool = True - random_state: Optional[int] = None + random_state: int | None = None _estimators: list[_ScikitModel] = field(init=False) _estimator_type: str = field(init=False) - _overall_estimator: Optional[_ScikitModel] = field(init=False) - _test_indices: Optional[tuple[np.ndarray]] = field(init=False) - _n_classes: Optional[int] = field(init=False) + _overall_estimator: _ScikitModel | None = field(init=False) + _test_indices: tuple[np.ndarray] | None = field(init=False) + _n_classes: int | None = field(init=False) def __post_init__(self): _validate_n_folds(self.n_folds) self._estimators: list[_ScikitModel] = [] self._estimator_type: str = self.estimator_factory._estimator_type - self._overall_estimator: Optional[_ScikitModel] = None - self._test_indices: Optional[tuple[np.ndarray]] = None - self._n_classes: Optional[int] = None + self._overall_estimator: _ScikitModel | None = None + self._test_indices: tuple[np.ndarray] | None = None + self._n_classes: int | None = None def _train_overall_estimator( - self, X: Matrix, y: Union[Matrix, Vector], fit_params: Optional[dict] = None + self, X: Matrix, y: Matrix | Vector, fit_params: dict | None = None ) -> _ScikitModel: fit_params = fit_params or dict() model = self.estimator_factory(**self.estimator_params) @@ -98,8 +98,8 @@ def _is_classification(self) -> bool: def fit( self, X: Matrix, - y: Union[Vector, Matrix], - fit_params: Optional[dict] = None, + y: Vector | Matrix, + fit_params: dict | None = None, **kwargs, ) -> Self: """Fit the underlying estimators. @@ -207,8 +207,8 @@ def _predict( X: Matrix, is_oos: bool, method: PredictMethod, - oos_method: Optional[OosMethod] = None, - w: Optional[Union[Vector, Matrix]] = None, + oos_method: OosMethod | None = None, + w: Vector | Matrix | None = None, ) -> np.ndarray: if is_oos: _validate_oos_method(oos_method, self.enable_overall) @@ -233,7 +233,7 @@ def predict( self, X: Matrix, is_oos: bool, - oos_method: Optional[OosMethod] = None, + oos_method: OosMethod | None = None, **kwargs, ) -> np.ndarray: """Predict from ``X``. @@ -255,7 +255,7 @@ def predict_proba( self, X: Matrix, is_oos: bool, - oos_method: Optional[OosMethod] = None, + oos_method: OosMethod | None = None, ) -> np.ndarray: """Predict probability from ``X``. diff --git a/metalearners/data_generation.py b/metalearners/data_generation.py index e259692..39dbdd7 100644 --- a/metalearners/data_generation.py +++ b/metalearners/data_generation.py @@ -1,7 +1,7 @@ # Copyright (c) QuantCo 2024-2024 # SPDX-License-Identifier: LicenseRef-QuantCo -from typing import Literal, Optional, Union +from typing import Literal import numpy as np import pandas as pd @@ -24,11 +24,11 @@ def generate_categoricals( n_obs: int, n_features: int, - n_categories: Optional[Union[int, np.ndarray]] = None, - n_uniform: Optional[int] = None, + n_categories: int | np.ndarray | None = None, + n_uniform: int | None = None, p_binomial: float = 0.5, use_strings: bool = False, - rng: Optional[np.random.Generator] = None, + rng: np.random.Generator | None = None, ) -> tuple[np.ndarray, np.ndarray]: r"""Generate a dataset of categorical features. @@ -87,9 +87,9 @@ def generate_categoricals( def generate_numericals( n_obs: int, n_features: int, - mu: Optional[Union[float, np.ndarray]] = None, + mu: float | np.ndarray | None = None, wishart_scale: float = 1, - rng: Optional[np.random.Generator] = None, + rng: np.random.Generator | None = None, ) -> np.ndarray: r"""Generate a dataset of numerical features. @@ -130,13 +130,13 @@ def generate_covariates( n_features: int, n_categoricals: int = 0, format: Literal["pandas", "numpy"] = "pandas", - mu: Optional[Union[float, np.ndarray]] = None, + mu: float | np.ndarray | None = None, wishart_scale: float = 1, - n_categories: Optional[Union[int, np.ndarray]] = None, - n_uniform: Optional[int] = None, + n_categories: int | np.ndarray | None = None, + n_uniform: int | None = None, p_binomial: float = 0.5, use_strings: bool = False, - rng: Optional[np.random.Generator] = None, + rng: np.random.Generator | None = None, ) -> tuple[Matrix, list[int], np.ndarray]: r"""Generates a dataset of covariates with both numerical and categorical features. @@ -225,7 +225,7 @@ def generate_covariates( def insert_missing( X: Matrix, missing_probability: float = 0.1, - rng: Optional[np.random.Generator] = None, + rng: np.random.Generator | None = None, ) -> Matrix: """Inserts missing values into the dataset. @@ -246,7 +246,7 @@ def insert_missing( def generate_treatment( - propensity_scores: np.ndarray, rng: Optional[np.random.Generator] = None + propensity_scores: np.ndarray, rng: np.random.Generator | None = None ) -> np.ndarray: """Generates a treatment assignment based on the provided propensity scores. @@ -283,11 +283,11 @@ def compute_experiment_outputs( treatment: Vector, sigma_y: float = 1, sigma_tau: float = 0.5, - n_variants: Optional[int] = None, + n_variants: int | None = None, is_classification: bool = False, positive_proportion: float = 0.5, return_probability_cate: bool = False, - rng: Optional[np.random.Generator] = None, + rng: np.random.Generator | None = None, ) -> tuple[np.ndarray, np.ndarray]: r"""Compute the experiment's observed outcomes y and the true CATE. diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py index d060606..38b36aa 100644 --- a/metalearners/metalearner.py +++ b/metalearners/metalearner.py @@ -3,7 +3,6 @@ from abc import ABC, abstractmethod from collections.abc import Collection -from typing import Optional, Union import numpy as np from typing_extensions import Self @@ -16,9 +15,9 @@ PredictMethod, ) -Params = dict[str, Union[int, float, str]] -Features = Union[Collection[str], Collection[int]] -ModelFactory = Union[type[_ScikitModel], dict[str, type[_ScikitModel]]] +Params = dict[str, int | float | str] +Features = Collection[str] | Collection[int] +ModelFactory = type[_ScikitModel] | dict[str, type[_ScikitModel]] def _initialize_model_dict(argument, expected_names: Collection[str]) -> dict: @@ -56,15 +55,15 @@ def __init__( self, nuisance_model_factory: ModelFactory, is_classification: bool, - treatment_model_factory: Optional[ModelFactory] = None, + treatment_model_factory: ModelFactory | None = None, # TODO: Consider whether we can make this not a state of the MetaLearner # but rather just a parameter of a predict call. - nuisance_model_params: Optional[Union[Params, dict[str, Params]]] = None, - treatment_model_params: Optional[Union[Params, dict[str, Params]]] = None, - feature_set: Optional[Union[Features, dict[str, Features]]] = None, + nuisance_model_params: Params | dict[str, Params] | None = None, + treatment_model_params: Params | dict[str, Params] | None = None, + feature_set: Features | dict[str, Features] | None = None, # TODO: Consider implementing selection of number of folds for various estimators. n_folds: int = 10, - random_state: Optional[int] = None, + random_state: int | None = None, ): """Initialize a MetaLearner. @@ -180,7 +179,7 @@ def dimension(n_obs, model_kind, predict_method): } def fit_nuisance( - self, X: Matrix, y: Vector, model_kind: str, fit_params: Optional[dict] = None + self, X: Matrix, y: Vector, model_kind: str, fit_params: dict | None = None ) -> Self: """Fit a given nuisance model of a MetaLearner. @@ -191,7 +190,7 @@ def fit_nuisance( return self def fit_treatment( - self, X: Matrix, y: Vector, model_kind: str, fit_params: Optional[dict] = None + self, X: Matrix, y: Vector, model_kind: str, fit_params: dict | None = None ) -> Self: """Fit the tratment model of a MetaLearner. @@ -261,7 +260,7 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, Union[float, int]]: + ) -> dict[str, float | int]: """Evaluate all models contained in a MetaLearner.""" ... diff --git a/metalearners/outcome_functions.py b/metalearners/outcome_functions.py index 7340379..c393ca2 100644 --- a/metalearners/outcome_functions.py +++ b/metalearners/outcome_functions.py @@ -1,7 +1,7 @@ # Copyright (c) QuantCo 2024-2024 # SPDX-License-Identifier: LicenseRef-QuantCo -from typing import Callable, Optional, Union +from collections.abc import Callable import numpy as np import pandas as pd @@ -10,17 +10,17 @@ def _beta( - ulow: float, uhigh: float, dim: Union[tuple, int], rng: np.random.Generator + ulow: float, uhigh: float, dim: tuple | int, rng: np.random.Generator ) -> np.ndarray: return rng.uniform(low=ulow, high=uhigh, size=dim) def constant_treatment_effect( dim: int, - tau: Union[float, np.ndarray], + tau: float | np.ndarray, ulow: float = 0, uhigh: float = 1, - rng: Optional[np.random.Generator] = None, + rng: np.random.Generator | None = None, ) -> Callable: r"""Generate a potential outcomes function with constant treatment effect. @@ -43,7 +43,7 @@ def constant_treatment_effect( rng = default_rng beta = _beta(ulow, uhigh, dim, rng) - if isinstance(tau, (int, float)): + if isinstance(tau, int | float): tau = np.array([tau]) tau = tau.reshape(1, -1) @@ -65,7 +65,7 @@ def no_treatment_effect( n_variants: int = 2, ulow: float = 0, uhigh: float = 1, - rng: Optional[np.random.Generator] = None, + rng: np.random.Generator | None = None, ) -> Callable: r"""Generate a potential outcomes function with no treatment effect. @@ -93,7 +93,7 @@ def linear_treatment_effect( n_variants: int = 2, ulow: float = 0, uhigh: float = 1, - rng: Optional[np.random.Generator] = None, + rng: np.random.Generator | None = None, ) -> Callable: r"""Generate a potential outcomes function with linear treatment effect. diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py index 4d7f66c..0cde441 100644 --- a/metalearners/tlearner.py +++ b/metalearners/tlearner.py @@ -1,7 +1,6 @@ # Copyright (c) QuantCo 2024-2024 # SPDX-License-Identifier: LicenseRef-QuantCo -from typing import Union import numpy as np from sklearn.metrics import log_loss, root_mean_squared_error @@ -156,7 +155,7 @@ def evaluate( w: Vector, is_oos: bool, oos_method: OosMethod = OVERALL, - ) -> dict[str, Union[float, int]]: + ) -> dict[str, float | int]: """Evaluate all models contained in the T-Learner.""" # TODO: Parametrize evaluation approaches. conditional_average_outcomes = self.predict_conditional_average_outcomes( diff --git a/pyproject.toml b/pyproject.toml index e1a7057..e78faef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,12 +13,11 @@ authors = [ ] classifiers = [ "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] -requires-python = ">=3.9" +requires-python = ">=3.10" [project.urls] repository = "https://github.com/quantco/metalearners" @@ -66,7 +65,7 @@ select = [ ] [tool.mypy] -python_version = '3.9' +python_version = '3.10' ignore_missing_imports = true no_implicit_optional = true check_untyped_defs = true diff --git a/tests/test_tlearner.py b/tests/test_tlearner.py index e211697..d5e5631 100644 --- a/tests/test_tlearner.py +++ b/tests/test_tlearner.py @@ -1,7 +1,6 @@ # Copyright (c) QuantCo 2024-2024 # SPDX-License-Identifier: LicenseRef-QuantCo -from typing import Union import numpy as np import pytest @@ -31,7 +30,7 @@ def _linear_base_learner(is_classification: bool): def _linear_base_learner_params( is_classification: bool, -) -> dict[str, Union[int, float, str]]: +) -> dict[str, int | float | str]: if is_classification: # Using the default value for max_iter sometimes # didn't lead to convergence.