diff --git a/sklearn2pmml/decoration/__init__.py b/sklearn2pmml/decoration/__init__.py index fb9e251..00c83be 100644 --- a/sklearn2pmml/decoration/__init__.py +++ b/sklearn2pmml/decoration/__init__.py @@ -161,7 +161,8 @@ def _valid_value_mask(self, X, where): if hasattr(X, "isin"): mask = X.isin(self.data_) else: - mask = numpy.isin(X, self.data_) + mask = numpy.full(X.shape, fill_value = False) + mask[where] = numpy.isin(X[where], self.data_) return numpy.logical_and(mask, where) return super(DiscreteDomain, self)._valid_value_mask(X, where) diff --git a/sklearn2pmml/decoration/tests/__init__.py b/sklearn2pmml/decoration/tests/__init__.py index 198eadd..a772346 100644 --- a/sklearn2pmml/decoration/tests/__init__.py +++ b/sklearn2pmml/decoration/tests/__init__.py @@ -11,6 +11,7 @@ from unittest import TestCase import numpy +import pandas class AliasTest(TestCase): @@ -113,6 +114,20 @@ def test_fit_int_missing(self): Xt = domain.transform(X) self.assertEqual([0, 0, 2], Xt.tolist()) + def test_fit_int64(self): + domain = clone(CategoricalDomain()) + X = Series([-1, None, 1, 2, -1]).astype("Int64") + self.assertEqual([False, True, False, False, False], domain._missing_value_mask(X).tolist()) + Xt = domain.fit_transform(X) + self.assertEqual([-1, 1, 2], domain.data_.tolist()) + self.assertEqual([-1, pandas.NA, 1, 2, -1], Xt.tolist()) + domain = clone(CategoricalDomain()) + X = X.to_numpy() + self.assertEqual([False, True, False, False, False], domain._missing_value_mask(X).tolist()) + Xt = domain.fit_transform(X) + self.assertEqual([-1, 1, 2], domain.data_.tolist()) + self.assertEqual([-1, pandas.NA, 1, 2, -1], Xt.tolist()) + def test_fit_int_categorical(self): domain = clone(CategoricalDomain(dtype = CategoricalDtype())) self.assertIsNone(domain.dtype.categories)