From 4941ef93a9b12da1895517f5d7316576ed7528fe Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Fri, 5 Jan 2024 09:17:12 +0200 Subject: [PATCH] Excluded the missing value replacement value from the list of valid values --- sklearn2pmml/decoration/__init__.py | 6 ------ sklearn2pmml/decoration/tests/__init__.py | 8 ++++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/sklearn2pmml/decoration/__init__.py b/sklearn2pmml/decoration/__init__.py index 21b86be..e593e81 100644 --- a/sklearn2pmml/decoration/__init__.py +++ b/sklearn2pmml/decoration/__init__.py @@ -218,10 +218,6 @@ def fit(self, X, y = None): data_values = self.dtype_.categories else: data_values = numpy.unique(X[nonmissing_mask]) - if (self.missing_value_replacement is not None) and numpy.any(missing_mask) > 0: - if _is_pandas_categorical(self.dtype_): - raise ValueError() - data_values = numpy.unique(numpy.append(data_values, self.missing_value_replacement)) else: data_values = numpy.asarray(self.data_values) self.data_values_ = data_values @@ -236,8 +232,6 @@ def fit(self, X, y = None): col_missing_mask = missing_mask[:, col] col_nonmissing_mask = nonmissing_mask[:, col] data_values = numpy.unique(col_X[col_nonmissing_mask]) - if (self.missing_value_replacement is not None) and numpy.any(col_missing_mask) > 0: - data_values = numpy.unique(numpy.append(data_values, self.missing_value_replacement)) else: data_values = numpy.asarray(self.data_values[col]) self.data_values_.append(data_values) diff --git a/sklearn2pmml/decoration/tests/__init__.py b/sklearn2pmml/decoration/tests/__init__.py index 2f81187..7706ee6 100644 --- a/sklearn2pmml/decoration/tests/__init__.py +++ b/sklearn2pmml/decoration/tests/__init__.py @@ -107,8 +107,8 @@ def test_fit_int_missing(self): Xt = domain.fit_transform(X) self.assertIsInstance(Xt, DataFrame) self.assertEqual(2, len(domain.data_values_)) - self.assertEqual([0, 1, 2, 3], domain.data_values_[0].tolist()) - self.assertEqual([0, 1, 2], domain.data_values_[1].tolist()) + self.assertEqual([1, 2, 3], domain.data_values_[0].tolist()) + self.assertEqual([1, 2], domain.data_values_[1].tolist()) self.assertEqual(2, len(domain.counts_)) self.assertEqual({"totalFreq" : 6, "missingFreq" : 2, "invalidFreq" : 0}, domain.counts_[0]) self.assertEqual({"totalFreq" : 6, "missingFreq" : 3, "invalidFreq" : 0}, domain.counts_[1]) @@ -182,8 +182,8 @@ def test_fit_string_missing(self): Xt = domain.fit_transform(X) self.assertIsInstance(Xt, DataFrame) self.assertEqual(2, len(domain.data_values_)) - self.assertEqual(["0", "1", "2", "3"], domain.data_values_[0].tolist()) - self.assertEqual(["0", "one", "three", "two"], domain.data_values_[1].tolist()) + self.assertEqual(["1", "2", "3"], domain.data_values_[0].tolist()) + self.assertEqual(["one", "three", "two"], domain.data_values_[1].tolist()) self.assertEqual(2, len(domain.counts_)) self.assertEqual({"totalFreq" : 6, "missingFreq" : 2, "invalidFreq" : 0}, domain.counts_[0]) self.assertEqual({"totalFreq" : 6, "missingFreq" : 3, "invalidFreq" : 0}, domain.counts_[1])