Merge pull request #40 from msamsami/add-support-new-numpy-sklearn

maint: add support for python 3.13, numpy 2, and scikit-learn 1.6
msamsami · Dec 25, 2024 · 9455be8 · 9455be8
2 parents c84d36b + b897d75
commit 9455be8
Show file tree

Hide file tree

Showing 13 changed files with 77 additions and 38 deletions.
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
 
     steps:
     - uses: actions/checkout@v3

diff --git a/README.md b/README.md
@@ -7,9 +7,9 @@
 
 <div align="center">
 
-![Lastest Release](https://img.shields.io/badge/release-v0.3.1-green)
+![Lastest Release](https://img.shields.io/badge/release-v0.4.0-green)
 [![PyPI Version](https://img.shields.io/pypi/v/wnb)](https://pypi.org/project/wnb/)
-![Python Versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)<br>
+![Python Versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)<br>
 ![GitHub Workflow Status (build)](https://github.com/msamsami/wnb/actions/workflows/build.yml/badge.svg)
 ![PyPI License](https://img.shields.io/pypi/l/wnb)
 [![PyPi Downloads](https://static.pepy.tech/badge/wnb)](https://pepy.tech/project/wnb)

diff --git a/pyproject.toml b/pyproject.toml
@@ -17,7 +17,6 @@ keywords = [
     "bayes",
     "naive bayes",
     "classifier",
-    "probabilistic",
 ]
 classifiers = [
     "Intended Audience :: Science/Research",
@@ -31,12 +30,12 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "License :: OSI Approved :: BSD License",
 ]
-requires-python = ">=3.8,<3.13"
+requires-python = ">=3.8,<3.14"
 dependencies = [
     "pandas>=1.4.1",
-    "numpy<2.0.0",
     "scipy>=1.8.0",
     "scikit-learn>=1.0.2",
     "typing-extensions>=4.8.0; python_full_version < '3.11'",
@@ -49,9 +48,9 @@ Source = "https://github.com/msamsami/wnb"
 [project.optional-dependencies]
 dev = [
     "pytest>=7.0.0",
-    "black==24.8.0",
+    "black>=24.8.0",
     "tqdm",
-    "pre-commit",
+    "pre-commit>=3.5.0",
     "isort",
 ]
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,4 @@
 pandas>=1.4.1
-numpy<2.0.0
 scipy>=1.8.0
 scikit-learn>=1.0.2
 typing-extensions>=4.8.0; python_version < "3.11"
diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -1,5 +1,5 @@
 pytest>=7.0.0
-black==24.8.0
+black>=24.8.0
 tqdm
-pre-commit
+pre-commit>=3.5.0
 isort
diff --git a/setup.py b/setup.py
@@ -9,7 +9,7 @@
     name="wnb",
     version=__version__,
     description="Python library for the implementations of general and weighted naive Bayes (WNB) classifiers.",
-    keywords=["python", "machine learning", "bayes", "naive bayes", "classifier", "probabilistic"],
+    keywords=["python", "machine learning", "bayes", "naive bayes", "classifier"],
     author="Mehdi Samsami",
     author_email="[email protected]",
     license="BSD License",
@@ -32,22 +32,22 @@
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
         "Programming Language :: Python :: 3.12",
+        "Programming Language :: Python :: 3.13",
         "License :: OSI Approved :: BSD License",
     ],
-    python_requires=">=3.8,<3.13",
+    python_requires=">=3.8,<3.14",
     install_requires=[
         "pandas>=1.4.1",
-        "numpy<2.0.0",
         "scipy>=1.8.0",
         "scikit-learn>=1.0.2",
         "typing-extensions>=4.8.0; python_full_version < '3.11'",
     ],
     extras_require={
         "dev": [
             "pytest>=7.0.0",
-            "black==24.8.0",
+            "black>=24.8.0",
             "tqdm",
-            "pre-commit",
+            "pre-commit>=3.5.0",
             "isort",
         ]
     },

diff --git a/tests/test_gwnb.py b/tests/test_gwnb.py
@@ -1,3 +1,5 @@
+import re
+
 import numpy as np
 import pytest
 from sklearn.base import is_classifier
@@ -131,8 +133,8 @@ def test_gwnb_non_binary():
     y_ = np.array([1, 2, 3, 4, 4, 3, 2, 1, 1, 2])
     clf = GaussianWNB()
 
-    msg = "Unknown label type: non-binary"
-    with pytest.raises(ValueError, match=msg):
+    pattern = re.compile(r"(Only binary classification is supported|Unknown label type: non-binary)")
+    with pytest.raises(ValueError, match=pattern):
         clf.fit(X_, y_)
 
 

diff --git a/wnb/__init__.py b/wnb/__init__.py
@@ -2,7 +2,7 @@
 Python library for the implementations of general and weighted naive Bayes (WNB) classifiers.
 """
 
-__version__ = "0.3.1"
+__version__ = "0.4.0"
 __author__ = "Mehdi Samsami"
 
 

diff --git a/wnb/_utils.py b/wnb/_utils.py
@@ -0,0 +1,19 @@
+from typing import Any
+
+import sklearn
+from packaging import version
+from sklearn.utils import check_array
+
+__all__ = ["SKLEARN_V1_6_OR_LATER", "validate_data"]
+
+
+SKLEARN_V1_6_OR_LATER = version.parse(sklearn.__version__) >= version.parse("1.6")
+
+
+if SKLEARN_V1_6_OR_LATER:
+    from sklearn.utils.validation import validate_data
+else:
+
+    def validate_data(estimator, X, **kwargs: Any):
+        kwargs.pop("reset", None)
+        return check_array(X, estimator=estimator, **kwargs)
diff --git a/wnb/gnb.py b/wnb/gnb.py
@@ -10,7 +10,7 @@
 from scipy.special import logsumexp
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.exceptions import DataConversionWarning
-from sklearn.utils import as_float_array, check_array
+from sklearn.utils import as_float_array
 from sklearn.utils.multiclass import check_classification_targets
 from sklearn.utils.validation import check_is_fitted
 
@@ -20,10 +20,11 @@
     from typing_extensions import Self
 
 from wnb.stats import Distribution, NonNumericDistributions
+from wnb.stats._utils import get_dist_class, is_dist_supported
 from wnb.stats.base import DistMixin
 from wnb.stats.typing import DistributionLike
-from wnb.stats.utils import get_dist_class, is_dist_supported
 
+from ._utils import SKLEARN_V1_6_OR_LATER, validate_data
 from .typing import ArrayLike, Float, MatrixLike
 
 __all__ = ["GeneralNB"]
@@ -83,6 +84,13 @@ def __init__(
         self.distributions = distributions
         self.alpha = alpha
 
+    if SKLEARN_V1_6_OR_LATER:
+
+        def __sklearn_tags__(self):
+            tags = super().__sklearn_tags__()
+            tags.target_tags.required = True
+            return tags
+
     def _more_tags(self) -> dict[str, bool]:
         return {"requires_y": True}
 
@@ -101,8 +109,9 @@ def _check_inputs(self, X, y) -> None:
         if self.n_classes_ == 1:
             raise ValueError("Classifier can't train when only one class is present")
 
-        X = check_array(
-            array=X,
+        X = validate_data(
+            self,
+            X,
             accept_sparse=False,
             accept_large_sparse=False,
             dtype=(
@@ -112,7 +121,6 @@ def _check_inputs(self, X, y) -> None:
             ensure_2d=True,
             ensure_min_samples=1,
             ensure_min_features=1,
-            estimator=self,
         )
 
         # Check if X contains complex values
@@ -282,14 +290,15 @@ def predict_log_proba(self, X: MatrixLike) -> np.ndarray:
         check_is_fitted(self)
 
         # Input validation
-        X = check_array(
-            array=X,
+        X = validate_data(
+            self,
+            X,
             accept_large_sparse=False,
             force_all_finite=True,
             dtype=(
                 None if any(d in self._get_distributions() for d in NonNumericDistributions) else "numeric"
             ),
-            estimator=self,
+            reset=False,
         )
 
         # Check if the number of input features matches the data seen during fit

diff --git a/wnb/gwnb.py b/wnb/gwnb.py
@@ -12,7 +12,7 @@
 from scipy.stats import norm
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.exceptions import DataConversionWarning
-from sklearn.utils import as_float_array, check_array, deprecated
+from sklearn.utils import as_float_array, deprecated
 from sklearn.utils.multiclass import check_classification_targets, type_of_target
 from sklearn.utils.validation import check_is_fitted
 
@@ -21,6 +21,7 @@
 else:
     from typing_extensions import Self
 
+from ._utils import SKLEARN_V1_6_OR_LATER, validate_data
 from .typing import ArrayLike, Float, Int, MatrixLike
 
 __all__ = ["GaussianWNB"]
@@ -111,6 +112,14 @@ def __init__(
         self.C = C
         self.learning_hist = learning_hist
 
+    if SKLEARN_V1_6_OR_LATER:
+
+        def __sklearn_tags__(self):
+            tags = super().__sklearn_tags__()
+            tags.target_tags.required = True
+            tags.classifier_tags.multi_class = False
+            return tags
+
     def _more_tags(self) -> dict[str, bool]:
         return {"binary_only": True, "requires_y": True}
 
@@ -119,24 +128,27 @@ def _check_inputs(self, X, y) -> None:
         check_classification_targets(y)
 
         # Check that the dataset has only two unique labels
-        if type_of_target(y) != "binary":
-            warnings.warn("This version of MLD-WNB only supports binary classification.")
-            raise ValueError("Unknown label type: non-binary")
+        if (y_type := type_of_target(y)) != "binary":
+            if SKLEARN_V1_6_OR_LATER:
+                msg = f"Only binary classification is supported. The type of the target is {y_type}."
+            else:
+                msg = "Unknown label type: non-binary"
+            raise ValueError(msg)
 
         # Check if only one class is present in label vector
         if self.n_classes_ == 1:
             raise ValueError("Classifier can't train when only one class is present.")
 
-        X = check_array(
-            array=X,
+        X = validate_data(
+            self,
+            X,
             accept_sparse=False,
             accept_large_sparse=False,
             dtype="numeric",
             force_all_finite=True,
             ensure_2d=True,
             ensure_min_samples=1,
             ensure_min_features=1,
-            estimator=self,
         )
 
         # Check if X contains complex values
@@ -416,7 +428,7 @@ def predict_log_proba(self, X: MatrixLike) -> np.ndarray:
         check_is_fitted(self)
 
         # Input validation
-        X = check_array(array=X, accept_large_sparse=False, force_all_finite=True, estimator=self)
+        X = validate_data(self, X, accept_large_sparse=False, force_all_finite=True, reset=False)
 
         # Check if the number of input features matches the data seen during fit
         if X.shape[1] != self.n_features_in_:

diff --git a/wnb/stats/utils.py → wnb/stats/_utils.py b/wnb/stats/utils.py → wnb/stats/_utils.py
@@ -6,8 +6,6 @@
 from .enums import Distribution
 from .typing import DistributionLike
 
-__all__ = ["is_dist_supported", "get_dist_class"]
-
 
 def is_dist_supported(dist: DistributionLike) -> bool:
     with contextlib.suppress(TypeError):

diff --git a/wnb/stats/discrete.py b/wnb/stats/discrete.py
@@ -1,3 +1,4 @@
+from math import factorial
 from typing import Any, Mapping
 
 import numpy as np
@@ -82,7 +83,7 @@ def from_data(cls, data, **kwargs: Any) -> "PoissonDist":
 
     def pmf(self, x: int) -> float:
         return (
-            (np.exp(-self.rate) * self.rate**x) / np.math.factorial(x)
+            (np.exp(-self.rate) * self.rate**x) / factorial(x)
             if x >= self._support[0] and x - int(x) == 0
             else 0.0
         )