Skip to content

Commit

Permalink
Added 'SelectFirstTransformer' transformation type
Browse files Browse the repository at this point in the history
  • Loading branch information
vruusmann committed Sep 23, 2023
1 parent ec4c80c commit 132bf49
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 14 deletions.
26 changes: 13 additions & 13 deletions sklearn2pmml/ensemble/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,19 @@ def predict_proba(self, X, **predict_proba_params):
result.append(proba[i - 1] - proba[i])
return numpy.asarray(result).T

def _to_sparse(X, step_mask, step_result):
# Make array
if len(step_result.shape) == 1:
result = numpy.empty((X.shape[0], ), dtype = object)
else:
result = numpy.empty((X.shape[0], step_result.shape[1]), dtype = object)
# Fill array
if len(step_result.shape) == 1:
result[step_mask.ravel()] = step_result
else:
result[step_mask.ravel(), :] = step_result
return result

class _BaseEnsemble(_BaseComposition):

def __init__(self, steps, controller):
Expand Down Expand Up @@ -216,19 +229,6 @@ def _to_evaluation_dataset(self, X):
return self.controller.transform(X)
return X

def _to_sparse(X, step_mask, step_result):
# Make array
if len(step_result.shape) == 1:
result = numpy.empty((X.shape[0], ), dtype = object)
else:
result = numpy.empty((X.shape[0], step_result.shape[1]), dtype = object)
# Fill array
if len(step_result.shape) == 1:
result[step_mask.ravel()] = step_result
else:
result[step_mask.ravel(), :] = step_result
return result

class Link(BaseEstimator):

def __init__(self, estimator, augment_funcs, prefit = False):
Expand Down
64 changes: 63 additions & 1 deletion sklearn2pmml/preprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from scipy.sparse import lil_matrix
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn2pmml.util import cast, dt_transform, ensure_1d, ensure_def, eval_rows, to_expr_func, Expression
from sklearn2pmml.util import cast, dt_transform, ensure_1d, ensure_def, eval_rows, eval_expr_rows, to_expr_func, Expression, Predicate

import numpy
import pandas
Expand Down Expand Up @@ -606,3 +606,65 @@ def transform(self, X):
if self.trim_blanks:
Xt = numpy.char.strip(Xt)
return Xt

def _to_sparse(X, step_mask, step_result):
# Make array
result = numpy.empty((X.shape[0], step_result.shape[1]), dtype = object)
# Fill array
result[step_mask.ravel(), :] = step_result
return result

class SelectFirstTransformer(BaseEstimator, TransformerMixin):

def __init__(self, steps, controller = None):
for step in steps:
if type(step) is not tuple:
raise TypeError("Step is not a tuple")
if len(step) != 3:
raise TypeError("Step is not a three-element (name, transformer, predicate) tuple")
name, transformer, predicate = step
if not isinstance(predicate, (str, Predicate)):
raise TypeError()
self.steps = steps
if controller:
if not hasattr(controller, "transform"):
raise TypeError()
self.controller = controller

def _to_evaluation_dataset(self, X):
if self.controller is not None:
return self.controller.transform(X)
return X

def fit(self, X, y = None):
X_eval = self._to_evaluation_dataset(X)
mask = numpy.zeros(X.shape[0], dtype = bool)
for name, transformer, predicate in self.steps:
step_mask = eval_expr_rows(X_eval, predicate, dtype = bool)
step_mask[mask] = False
if numpy.sum(step_mask) < 1:
raise ValueError(predicate)
step_X = X[step_mask]
step_y = y[step_mask] if y is not None else None
transformer.fit(step_X, step_y)
mask = numpy.logical_or(mask, step_mask)
return self

def transform(self, X):
result = None
X_eval = self._to_evaluation_dataset(X)
mask = numpy.zeros(X.shape[0], dtype = bool)
for name, transformer, predicate in self.steps:
step_mask = eval_expr_rows(X_eval, predicate, dtype = bool)
step_mask[mask] = False
if numpy.sum(step_mask) < 1:
continue
step_X = X[step_mask]
step_result = transformer.transform(step_X)
step_result = _to_sparse(X, step_mask, step_result)
if result is None:
result = step_result
else:
result[step_mask] = step_result[step_mask]
mask = numpy.logical_or(mask, step_mask)
return result

0 comments on commit 132bf49

Please sign in to comment.