Skip to content

Commit

Permalink
Merge pull request #40 from msamsami/add-support-new-numpy-sklearn
Browse files Browse the repository at this point in the history
maint: add support for python 3.13, numpy 2, and scikit-learn 1.6
  • Loading branch information
msamsami authored Dec 25, 2024
2 parents c84d36b + b897d75 commit 9455be8
Show file tree
Hide file tree
Showing 13 changed files with 77 additions and 38 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]

steps:
- uses: actions/checkout@v3
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

<div align="center">

![Lastest Release](https://img.shields.io/badge/release-v0.3.1-green)
![Lastest Release](https://img.shields.io/badge/release-v0.4.0-green)
[![PyPI Version](https://img.shields.io/pypi/v/wnb)](https://pypi.org/project/wnb/)
![Python Versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)<br>
![Python Versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)<br>
![GitHub Workflow Status (build)](https://github.com/msamsami/wnb/actions/workflows/build.yml/badge.svg)
![PyPI License](https://img.shields.io/pypi/l/wnb)
[![PyPi Downloads](https://static.pepy.tech/badge/wnb)](https://pepy.tech/project/wnb)
Expand Down
9 changes: 4 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ keywords = [
"bayes",
"naive bayes",
"classifier",
"probabilistic",
]
classifiers = [
"Intended Audience :: Science/Research",
Expand All @@ -31,12 +30,12 @@ classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"License :: OSI Approved :: BSD License",
]
requires-python = ">=3.8,<3.13"
requires-python = ">=3.8,<3.14"
dependencies = [
"pandas>=1.4.1",
"numpy<2.0.0",
"scipy>=1.8.0",
"scikit-learn>=1.0.2",
"typing-extensions>=4.8.0; python_full_version < '3.11'",
Expand All @@ -49,9 +48,9 @@ Source = "https://github.com/msamsami/wnb"
[project.optional-dependencies]
dev = [
"pytest>=7.0.0",
"black==24.8.0",
"black>=24.8.0",
"tqdm",
"pre-commit",
"pre-commit>=3.5.0",
"isort",
]

Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
pandas>=1.4.1
numpy<2.0.0
scipy>=1.8.0
scikit-learn>=1.0.2
typing-extensions>=4.8.0; python_version < "3.11"
4 changes: 2 additions & 2 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pytest>=7.0.0
black==24.8.0
black>=24.8.0
tqdm
pre-commit
pre-commit>=3.5.0
isort
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
name="wnb",
version=__version__,
description="Python library for the implementations of general and weighted naive Bayes (WNB) classifiers.",
keywords=["python", "machine learning", "bayes", "naive bayes", "classifier", "probabilistic"],
keywords=["python", "machine learning", "bayes", "naive bayes", "classifier"],
author="Mehdi Samsami",
author_email="[email protected]",
license="BSD License",
Expand All @@ -32,22 +32,22 @@
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"License :: OSI Approved :: BSD License",
],
python_requires=">=3.8,<3.13",
python_requires=">=3.8,<3.14",
install_requires=[
"pandas>=1.4.1",
"numpy<2.0.0",
"scipy>=1.8.0",
"scikit-learn>=1.0.2",
"typing-extensions>=4.8.0; python_full_version < '3.11'",
],
extras_require={
"dev": [
"pytest>=7.0.0",
"black==24.8.0",
"black>=24.8.0",
"tqdm",
"pre-commit",
"pre-commit>=3.5.0",
"isort",
]
},
Expand Down
6 changes: 4 additions & 2 deletions tests/test_gwnb.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

import numpy as np
import pytest
from sklearn.base import is_classifier
Expand Down Expand Up @@ -131,8 +133,8 @@ def test_gwnb_non_binary():
y_ = np.array([1, 2, 3, 4, 4, 3, 2, 1, 1, 2])
clf = GaussianWNB()

msg = "Unknown label type: non-binary"
with pytest.raises(ValueError, match=msg):
pattern = re.compile(r"(Only binary classification is supported|Unknown label type: non-binary)")
with pytest.raises(ValueError, match=pattern):
clf.fit(X_, y_)


Expand Down
2 changes: 1 addition & 1 deletion wnb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Python library for the implementations of general and weighted naive Bayes (WNB) classifiers.
"""

__version__ = "0.3.1"
__version__ = "0.4.0"
__author__ = "Mehdi Samsami"


Expand Down
19 changes: 19 additions & 0 deletions wnb/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from typing import Any

import sklearn
from packaging import version
from sklearn.utils import check_array

__all__ = ["SKLEARN_V1_6_OR_LATER", "validate_data"]


SKLEARN_V1_6_OR_LATER = version.parse(sklearn.__version__) >= version.parse("1.6")


if SKLEARN_V1_6_OR_LATER:
from sklearn.utils.validation import validate_data
else:

def validate_data(estimator, X, **kwargs: Any):
kwargs.pop("reset", None)
return check_array(X, estimator=estimator, **kwargs)
25 changes: 17 additions & 8 deletions wnb/gnb.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from scipy.special import logsumexp
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import DataConversionWarning
from sklearn.utils import as_float_array, check_array
from sklearn.utils import as_float_array
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_is_fitted

Expand All @@ -20,10 +20,11 @@
from typing_extensions import Self

from wnb.stats import Distribution, NonNumericDistributions
from wnb.stats._utils import get_dist_class, is_dist_supported
from wnb.stats.base import DistMixin
from wnb.stats.typing import DistributionLike
from wnb.stats.utils import get_dist_class, is_dist_supported

from ._utils import SKLEARN_V1_6_OR_LATER, validate_data
from .typing import ArrayLike, Float, MatrixLike

__all__ = ["GeneralNB"]
Expand Down Expand Up @@ -83,6 +84,13 @@ def __init__(
self.distributions = distributions
self.alpha = alpha

if SKLEARN_V1_6_OR_LATER:

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.target_tags.required = True
return tags

def _more_tags(self) -> dict[str, bool]:
return {"requires_y": True}

Expand All @@ -101,8 +109,9 @@ def _check_inputs(self, X, y) -> None:
if self.n_classes_ == 1:
raise ValueError("Classifier can't train when only one class is present")

X = check_array(
array=X,
X = validate_data(
self,
X,
accept_sparse=False,
accept_large_sparse=False,
dtype=(
Expand All @@ -112,7 +121,6 @@ def _check_inputs(self, X, y) -> None:
ensure_2d=True,
ensure_min_samples=1,
ensure_min_features=1,
estimator=self,
)

# Check if X contains complex values
Expand Down Expand Up @@ -282,14 +290,15 @@ def predict_log_proba(self, X: MatrixLike) -> np.ndarray:
check_is_fitted(self)

# Input validation
X = check_array(
array=X,
X = validate_data(
self,
X,
accept_large_sparse=False,
force_all_finite=True,
dtype=(
None if any(d in self._get_distributions() for d in NonNumericDistributions) else "numeric"
),
estimator=self,
reset=False,
)

# Check if the number of input features matches the data seen during fit
Expand Down
28 changes: 20 additions & 8 deletions wnb/gwnb.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from scipy.stats import norm
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import DataConversionWarning
from sklearn.utils import as_float_array, check_array, deprecated
from sklearn.utils import as_float_array, deprecated
from sklearn.utils.multiclass import check_classification_targets, type_of_target
from sklearn.utils.validation import check_is_fitted

Expand All @@ -21,6 +21,7 @@
else:
from typing_extensions import Self

from ._utils import SKLEARN_V1_6_OR_LATER, validate_data
from .typing import ArrayLike, Float, Int, MatrixLike

__all__ = ["GaussianWNB"]
Expand Down Expand Up @@ -111,6 +112,14 @@ def __init__(
self.C = C
self.learning_hist = learning_hist

if SKLEARN_V1_6_OR_LATER:

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.target_tags.required = True
tags.classifier_tags.multi_class = False
return tags

def _more_tags(self) -> dict[str, bool]:
return {"binary_only": True, "requires_y": True}

Expand All @@ -119,24 +128,27 @@ def _check_inputs(self, X, y) -> None:
check_classification_targets(y)

# Check that the dataset has only two unique labels
if type_of_target(y) != "binary":
warnings.warn("This version of MLD-WNB only supports binary classification.")
raise ValueError("Unknown label type: non-binary")
if (y_type := type_of_target(y)) != "binary":
if SKLEARN_V1_6_OR_LATER:
msg = f"Only binary classification is supported. The type of the target is {y_type}."
else:
msg = "Unknown label type: non-binary"
raise ValueError(msg)

# Check if only one class is present in label vector
if self.n_classes_ == 1:
raise ValueError("Classifier can't train when only one class is present.")

X = check_array(
array=X,
X = validate_data(
self,
X,
accept_sparse=False,
accept_large_sparse=False,
dtype="numeric",
force_all_finite=True,
ensure_2d=True,
ensure_min_samples=1,
ensure_min_features=1,
estimator=self,
)

# Check if X contains complex values
Expand Down Expand Up @@ -416,7 +428,7 @@ def predict_log_proba(self, X: MatrixLike) -> np.ndarray:
check_is_fitted(self)

# Input validation
X = check_array(array=X, accept_large_sparse=False, force_all_finite=True, estimator=self)
X = validate_data(self, X, accept_large_sparse=False, force_all_finite=True, reset=False)

# Check if the number of input features matches the data seen during fit
if X.shape[1] != self.n_features_in_:
Expand Down
2 changes: 0 additions & 2 deletions wnb/stats/utils.py → wnb/stats/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
from .enums import Distribution
from .typing import DistributionLike

__all__ = ["is_dist_supported", "get_dist_class"]


def is_dist_supported(dist: DistributionLike) -> bool:
with contextlib.suppress(TypeError):
Expand Down
3 changes: 2 additions & 1 deletion wnb/stats/discrete.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from math import factorial
from typing import Any, Mapping

import numpy as np
Expand Down Expand Up @@ -82,7 +83,7 @@ def from_data(cls, data, **kwargs: Any) -> "PoissonDist":

def pmf(self, x: int) -> float:
return (
(np.exp(-self.rate) * self.rate**x) / np.math.factorial(x)
(np.exp(-self.rate) * self.rate**x) / factorial(x)
if x >= self._support[0] and x - int(x) == 0
else 0.0
)

0 comments on commit 9455be8

Please sign in to comment.