diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index abf988d..1c3ec0b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -35,7 +35,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        PYTHON_VERSION: ["3.9", "3.10", "3.11", "3.12"]
+        PYTHON_VERSION: ["3.10", "3.11", "3.12"]
     steps:
       - name: Checkout branch
         uses: actions/checkout@v4
diff --git a/conda.recipe/recipe.yaml b/conda.recipe/recipe.yaml
index 3010edf..6a354e1 100644
--- a/conda.recipe/recipe.yaml
+++ b/conda.recipe/recipe.yaml
@@ -19,11 +19,11 @@ build:
 
 requirements:
   host:
-    - python >=3.9
+    - python >=3.10
     - pip
     - setuptools-scm
   run:
-    - python >=3.9
+    - python >=3.10
     - scikit-learn >=1.3
     - pandas
     - numpy
diff --git a/environment.yml b/environment.yml
index 5ddc8e8..e8af3b3 100644
--- a/environment.yml
+++ b/environment.yml
@@ -9,7 +9,7 @@ dependencies:
   - git_root
   ## Python
   - pip
-  - python>=3.9
+  - python>=3.10
   - setuptools-scm
   - mypy=1.9.0
   - setuptools>=61 # Adds support for pyproject.toml package declaration.
diff --git a/metalearners/_utils.py b/metalearners/_utils.py
index 930ad82..7e1ab54 100644
--- a/metalearners/_utils.py
+++ b/metalearners/_utils.py
@@ -3,14 +3,15 @@
 
 import operator
 from operator import le, lt
-from typing import Optional, Protocol, Union
+from typing import Protocol, Union
 
 import numpy as np
 import pandas as pd
 from sklearn.base import check_array, check_X_y
 
-Vector = Union[pd.Series, np.ndarray]
-Matrix = Union[pd.DataFrame, np.ndarray]
+# ruff is not happy about the usage of Union.
+Vector = Union[pd.Series, np.ndarray]  # noqa
+Matrix = Union[pd.DataFrame, np.ndarray]  # noqa
 
 default_rng = np.random.default_rng()
 
@@ -38,7 +39,7 @@ def index_matrix(matrix: Matrix, rows: Vector) -> Matrix:
 
 
 def validate_number_positive(
-    value: Union[int, float], name: str, strict: bool = False
+    value: int | float, name: str, strict: bool = False
 ) -> None:
     if strict:
         comparison = operator.lt
@@ -50,10 +51,10 @@ def validate_number_positive(
 
 def check_propensity_score(
     propensity_scores: Matrix,
-    features: Optional[Matrix] = None,
+    features: Matrix | None = None,
     n_variants: int = 2,
     sum_to_one: bool = False,
-    check_kwargs: Optional[dict] = None,
+    check_kwargs: dict | None = None,
     sum_tolerance: float = 0.001,
 ) -> None:
     """Ensure propensity scores match assumptions.
@@ -100,7 +101,7 @@ def check_propensity_score(
 
 
 def convert_and_pad_propensity_score(
-    propensity_scores: Union[Vector, Matrix], n_variants: int
+    propensity_scores: Vector | Matrix, n_variants: int
 ) -> np.ndarray:
     """Convert to ``np.ndarray`` and pad propensity scores, if necessary.
 
diff --git a/metalearners/cross_fit_estimator.py b/metalearners/cross_fit_estimator.py
index 1e20a36..e3bd0df 100644
--- a/metalearners/cross_fit_estimator.py
+++ b/metalearners/cross_fit_estimator.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: LicenseRef-QuantCo
 
 from dataclasses import dataclass, field
-from typing import Literal, Optional, Union
+from typing import Literal
 
 import numpy as np
 from sklearn.base import is_classifier
@@ -26,7 +26,7 @@
 PredictMethod = Literal["predict", "predict_proba"]
 
 
-def _validate_oos_method(oos_method: Optional[OosMethod], enable_overall: bool) -> None:
+def _validate_oos_method(oos_method: OosMethod | None, enable_overall: bool) -> None:
     if oos_method not in _OOS_WHITELIST:
         raise ValueError(
             f"oos_method {oos_method} not supported. Supported values are "
@@ -69,23 +69,23 @@ class CrossFitEstimator:
     estimator_factory: type[_ScikitModel]
     estimator_params: dict = field(default_factory=dict)
     enable_overall: bool = True
-    random_state: Optional[int] = None
+    random_state: int | None = None
     _estimators: list[_ScikitModel] = field(init=False)
     _estimator_type: str = field(init=False)
-    _overall_estimator: Optional[_ScikitModel] = field(init=False)
-    _test_indices: Optional[tuple[np.ndarray]] = field(init=False)
-    _n_classes: Optional[int] = field(init=False)
+    _overall_estimator: _ScikitModel | None = field(init=False)
+    _test_indices: tuple[np.ndarray] | None = field(init=False)
+    _n_classes: int | None = field(init=False)
 
     def __post_init__(self):
         _validate_n_folds(self.n_folds)
         self._estimators: list[_ScikitModel] = []
         self._estimator_type: str = self.estimator_factory._estimator_type
-        self._overall_estimator: Optional[_ScikitModel] = None
-        self._test_indices: Optional[tuple[np.ndarray]] = None
-        self._n_classes: Optional[int] = None
+        self._overall_estimator: _ScikitModel | None = None
+        self._test_indices: tuple[np.ndarray] | None = None
+        self._n_classes: int | None = None
 
     def _train_overall_estimator(
-        self, X: Matrix, y: Union[Matrix, Vector], fit_params: Optional[dict] = None
+        self, X: Matrix, y: Matrix | Vector, fit_params: dict | None = None
     ) -> _ScikitModel:
         fit_params = fit_params or dict()
         model = self.estimator_factory(**self.estimator_params)
@@ -98,8 +98,8 @@ def _is_classification(self) -> bool:
     def fit(
         self,
         X: Matrix,
-        y: Union[Vector, Matrix],
-        fit_params: Optional[dict] = None,
+        y: Vector | Matrix,
+        fit_params: dict | None = None,
         **kwargs,
     ) -> Self:
         """Fit the underlying estimators.
@@ -207,8 +207,8 @@ def _predict(
         X: Matrix,
         is_oos: bool,
         method: PredictMethod,
-        oos_method: Optional[OosMethod] = None,
-        w: Optional[Union[Vector, Matrix]] = None,
+        oos_method: OosMethod | None = None,
+        w: Vector | Matrix | None = None,
     ) -> np.ndarray:
         if is_oos:
             _validate_oos_method(oos_method, self.enable_overall)
@@ -233,7 +233,7 @@ def predict(
         self,
         X: Matrix,
         is_oos: bool,
-        oos_method: Optional[OosMethod] = None,
+        oos_method: OosMethod | None = None,
         **kwargs,
     ) -> np.ndarray:
         """Predict from ``X``.
@@ -255,7 +255,7 @@ def predict_proba(
         self,
         X: Matrix,
         is_oos: bool,
-        oos_method: Optional[OosMethod] = None,
+        oos_method: OosMethod | None = None,
     ) -> np.ndarray:
         """Predict probability from ``X``.
 
diff --git a/metalearners/data_generation.py b/metalearners/data_generation.py
index e259692..39dbdd7 100644
--- a/metalearners/data_generation.py
+++ b/metalearners/data_generation.py
@@ -1,7 +1,7 @@
 # Copyright (c) QuantCo 2024-2024
 # SPDX-License-Identifier: LicenseRef-QuantCo
 
-from typing import Literal, Optional, Union
+from typing import Literal
 
 import numpy as np
 import pandas as pd
@@ -24,11 +24,11 @@
 def generate_categoricals(
     n_obs: int,
     n_features: int,
-    n_categories: Optional[Union[int, np.ndarray]] = None,
-    n_uniform: Optional[int] = None,
+    n_categories: int | np.ndarray | None = None,
+    n_uniform: int | None = None,
     p_binomial: float = 0.5,
     use_strings: bool = False,
-    rng: Optional[np.random.Generator] = None,
+    rng: np.random.Generator | None = None,
 ) -> tuple[np.ndarray, np.ndarray]:
     r"""Generate a dataset of categorical features.
 
@@ -87,9 +87,9 @@ def generate_categoricals(
 def generate_numericals(
     n_obs: int,
     n_features: int,
-    mu: Optional[Union[float, np.ndarray]] = None,
+    mu: float | np.ndarray | None = None,
     wishart_scale: float = 1,
-    rng: Optional[np.random.Generator] = None,
+    rng: np.random.Generator | None = None,
 ) -> np.ndarray:
     r"""Generate a dataset of numerical features.
 
@@ -130,13 +130,13 @@ def generate_covariates(
     n_features: int,
     n_categoricals: int = 0,
     format: Literal["pandas", "numpy"] = "pandas",
-    mu: Optional[Union[float, np.ndarray]] = None,
+    mu: float | np.ndarray | None = None,
     wishart_scale: float = 1,
-    n_categories: Optional[Union[int, np.ndarray]] = None,
-    n_uniform: Optional[int] = None,
+    n_categories: int | np.ndarray | None = None,
+    n_uniform: int | None = None,
     p_binomial: float = 0.5,
     use_strings: bool = False,
-    rng: Optional[np.random.Generator] = None,
+    rng: np.random.Generator | None = None,
 ) -> tuple[Matrix, list[int], np.ndarray]:
     r"""Generates a dataset of covariates with both numerical and categorical features.
 
@@ -225,7 +225,7 @@ def generate_covariates(
 def insert_missing(
     X: Matrix,
     missing_probability: float = 0.1,
-    rng: Optional[np.random.Generator] = None,
+    rng: np.random.Generator | None = None,
 ) -> Matrix:
     """Inserts missing values into the dataset.
 
@@ -246,7 +246,7 @@ def insert_missing(
 
 
 def generate_treatment(
-    propensity_scores: np.ndarray, rng: Optional[np.random.Generator] = None
+    propensity_scores: np.ndarray, rng: np.random.Generator | None = None
 ) -> np.ndarray:
     """Generates a treatment assignment based on the provided propensity scores.
 
@@ -283,11 +283,11 @@ def compute_experiment_outputs(
     treatment: Vector,
     sigma_y: float = 1,
     sigma_tau: float = 0.5,
-    n_variants: Optional[int] = None,
+    n_variants: int | None = None,
     is_classification: bool = False,
     positive_proportion: float = 0.5,
     return_probability_cate: bool = False,
-    rng: Optional[np.random.Generator] = None,
+    rng: np.random.Generator | None = None,
 ) -> tuple[np.ndarray, np.ndarray]:
     r"""Compute the experiment's observed outcomes y and the true CATE.
 
diff --git a/metalearners/metalearner.py b/metalearners/metalearner.py
index d060606..38b36aa 100644
--- a/metalearners/metalearner.py
+++ b/metalearners/metalearner.py
@@ -3,7 +3,6 @@
 
 from abc import ABC, abstractmethod
 from collections.abc import Collection
-from typing import Optional, Union
 
 import numpy as np
 from typing_extensions import Self
@@ -16,9 +15,9 @@
     PredictMethod,
 )
 
-Params = dict[str, Union[int, float, str]]
-Features = Union[Collection[str], Collection[int]]
-ModelFactory = Union[type[_ScikitModel], dict[str, type[_ScikitModel]]]
+Params = dict[str, int | float | str]
+Features = Collection[str] | Collection[int]
+ModelFactory = type[_ScikitModel] | dict[str, type[_ScikitModel]]
 
 
 def _initialize_model_dict(argument, expected_names: Collection[str]) -> dict:
@@ -56,15 +55,15 @@ def __init__(
         self,
         nuisance_model_factory: ModelFactory,
         is_classification: bool,
-        treatment_model_factory: Optional[ModelFactory] = None,
+        treatment_model_factory: ModelFactory | None = None,
         # TODO: Consider whether we can make this not a state of the MetaLearner
         # but rather just a parameter of a predict call.
-        nuisance_model_params: Optional[Union[Params, dict[str, Params]]] = None,
-        treatment_model_params: Optional[Union[Params, dict[str, Params]]] = None,
-        feature_set: Optional[Union[Features, dict[str, Features]]] = None,
+        nuisance_model_params: Params | dict[str, Params] | None = None,
+        treatment_model_params: Params | dict[str, Params] | None = None,
+        feature_set: Features | dict[str, Features] | None = None,
         # TODO: Consider implementing selection of number of folds for various estimators.
         n_folds: int = 10,
-        random_state: Optional[int] = None,
+        random_state: int | None = None,
     ):
         """Initialize a MetaLearner.
 
@@ -180,7 +179,7 @@ def dimension(n_obs, model_kind, predict_method):
         }
 
     def fit_nuisance(
-        self, X: Matrix, y: Vector, model_kind: str, fit_params: Optional[dict] = None
+        self, X: Matrix, y: Vector, model_kind: str, fit_params: dict | None = None
     ) -> Self:
         """Fit a given nuisance model of a MetaLearner.
 
@@ -191,7 +190,7 @@ def fit_nuisance(
         return self
 
     def fit_treatment(
-        self, X: Matrix, y: Vector, model_kind: str, fit_params: Optional[dict] = None
+        self, X: Matrix, y: Vector, model_kind: str, fit_params: dict | None = None
     ) -> Self:
         """Fit the tratment model of a MetaLearner.
 
@@ -261,7 +260,7 @@ def evaluate(
         w: Vector,
         is_oos: bool,
         oos_method: OosMethod = OVERALL,
-    ) -> dict[str, Union[float, int]]:
+    ) -> dict[str, float | int]:
         """Evaluate all models contained in a MetaLearner."""
         ...
 
diff --git a/metalearners/outcome_functions.py b/metalearners/outcome_functions.py
index 7340379..c393ca2 100644
--- a/metalearners/outcome_functions.py
+++ b/metalearners/outcome_functions.py
@@ -1,7 +1,7 @@
 # Copyright (c) QuantCo 2024-2024
 # SPDX-License-Identifier: LicenseRef-QuantCo
 
-from typing import Callable, Optional, Union
+from collections.abc import Callable
 
 import numpy as np
 import pandas as pd
@@ -10,17 +10,17 @@
 
 
 def _beta(
-    ulow: float, uhigh: float, dim: Union[tuple, int], rng: np.random.Generator
+    ulow: float, uhigh: float, dim: tuple | int, rng: np.random.Generator
 ) -> np.ndarray:
     return rng.uniform(low=ulow, high=uhigh, size=dim)
 
 
 def constant_treatment_effect(
     dim: int,
-    tau: Union[float, np.ndarray],
+    tau: float | np.ndarray,
     ulow: float = 0,
     uhigh: float = 1,
-    rng: Optional[np.random.Generator] = None,
+    rng: np.random.Generator | None = None,
 ) -> Callable:
     r"""Generate a potential outcomes function with constant treatment effect.
 
@@ -43,7 +43,7 @@ def constant_treatment_effect(
         rng = default_rng
 
     beta = _beta(ulow, uhigh, dim, rng)
-    if isinstance(tau, (int, float)):
+    if isinstance(tau, int | float):
         tau = np.array([tau])
     tau = tau.reshape(1, -1)
 
@@ -65,7 +65,7 @@ def no_treatment_effect(
     n_variants: int = 2,
     ulow: float = 0,
     uhigh: float = 1,
-    rng: Optional[np.random.Generator] = None,
+    rng: np.random.Generator | None = None,
 ) -> Callable:
     r"""Generate a potential outcomes function with no treatment effect.
 
@@ -93,7 +93,7 @@ def linear_treatment_effect(
     n_variants: int = 2,
     ulow: float = 0,
     uhigh: float = 1,
-    rng: Optional[np.random.Generator] = None,
+    rng: np.random.Generator | None = None,
 ) -> Callable:
     r"""Generate a potential outcomes function with linear treatment effect.
 
diff --git a/metalearners/tlearner.py b/metalearners/tlearner.py
index 4d7f66c..0cde441 100644
--- a/metalearners/tlearner.py
+++ b/metalearners/tlearner.py
@@ -1,7 +1,6 @@
 # Copyright (c) QuantCo 2024-2024
 # SPDX-License-Identifier: LicenseRef-QuantCo
 
-from typing import Union
 
 import numpy as np
 from sklearn.metrics import log_loss, root_mean_squared_error
@@ -156,7 +155,7 @@ def evaluate(
         w: Vector,
         is_oos: bool,
         oos_method: OosMethod = OVERALL,
-    ) -> dict[str, Union[float, int]]:
+    ) -> dict[str, float | int]:
         """Evaluate all models contained in the T-Learner."""
         # TODO: Parametrize evaluation approaches.
         conditional_average_outcomes = self.predict_conditional_average_outcomes(
diff --git a/pyproject.toml b/pyproject.toml
index e1a7057..e78faef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,12 +13,11 @@ authors = [
 ]
 classifiers = [
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
 ]
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 
 [project.urls]
 repository = "https://github.com/quantco/metalearners"
@@ -66,7 +65,7 @@ select = [
 ]
 
 [tool.mypy]
-python_version = '3.9'
+python_version = '3.10'
 ignore_missing_imports = true
 no_implicit_optional = true
 check_untyped_defs = true
diff --git a/tests/test_tlearner.py b/tests/test_tlearner.py
index e211697..d5e5631 100644
--- a/tests/test_tlearner.py
+++ b/tests/test_tlearner.py
@@ -1,7 +1,6 @@
 # Copyright (c) QuantCo 2024-2024
 # SPDX-License-Identifier: LicenseRef-QuantCo
 
-from typing import Union
 
 import numpy as np
 import pytest
@@ -31,7 +30,7 @@ def _linear_base_learner(is_classification: bool):
 
 def _linear_base_learner_params(
     is_classification: bool,
-) -> dict[str, Union[int, float, str]]:
+) -> dict[str, int | float | str]:
     if is_classification:
         # Using the default value for max_iter sometimes
         # didn't lead to convergence.