From c6f84a7668dce90c753a34c296ce1a7861e92d9c Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Fri, 19 Nov 2021 19:04:21 +0100 Subject: [PATCH 1/3] Drafted ColumnTransformer --- README.md | 2 + scikinC/ColumnTransformerConverter.py | 140 ++++++++++++++++++++++ scikinC/FunctionTransformerConverter.py | 97 +++++++++++++++ scikinC/QuantileTransformerConverter.py | 35 +----- scikinC/__init__.py | 2 + scikinC/_tools.py | 31 +++++ test/test_ColumnTransformerConverter.py | 116 ++++++++++++++++++ test/test_FunctionTransformerConverter.py | 75 ++++++++++++ 8 files changed, 468 insertions(+), 30 deletions(-) create mode 100644 scikinC/ColumnTransformerConverter.py create mode 100644 scikinC/FunctionTransformerConverter.py create mode 100644 test/test_ColumnTransformerConverter.py create mode 100644 test/test_FunctionTransformerConverter.py diff --git a/README.md b/README.md index bc62d03..87d9905 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,8 @@ A few notes: | `MinMaxScaler` | Available | Available | | | `StandardScaler` | Available | Available | | | `QuantileTransformer` | Available | Available | | + | `FunctionTransformer` | Available | Available | Only functions in math.h | + | `ColumnTransformer` | Available | Available | Only integer column indices | | `Pipeline` | Available | Partial | Pipelines of pipelines break | #### Scikit-Learn models diff --git a/scikinC/ColumnTransformerConverter.py b/scikinC/ColumnTransformerConverter.py new file mode 100644 index 0000000..2c62702 --- /dev/null +++ b/scikinC/ColumnTransformerConverter.py @@ -0,0 +1,140 @@ +import numpy as np + +from sklearn.preprocessing import FunctionTransformer + +import scikinC +from scikinC import BaseConverter +from ._tools import array2c + +import sys + + +class ColumnTransformerConverter (BaseConverter): + def convert(self, model, name=None): + lines = self.header() + + index_mapping = [] + keys = [] + transformers = [] + for key, transformer, columns in model.transformers_: + if transformer == 'drop': + continue + + if not all([isinstance(c, int) for c in columns]): + raise NotImplementedError ("Columns can only be indexed with integers") + + index_mapping += columns + + if key is None: + key = "Preprocessor" + if key in keys: + key.append (str(1+len(keys))) + + if isinstance(transformer, (FunctionTransformer,)): + transformer.n_features_in_ = len(columns) + + transformers.append (('colcnv_%s_%s' % (name, key), transformer, columns)) + + + if len([t for _, t, _ in transformers if t != 'passthrough']): + lines.append( + scikinC.convert({k: t for k,t,_ in transformers if t != 'passthrough'}) + ) + + mapping = {k: c for k,_,c in transformers} + + nFeatures = 1+max(index_mapping) + + lines.append(""" + extern "C" + FLOAT_T* %(name)s (FLOAT_T* ret, const FLOAT_T *input) + { + int c; + FLOAT_T bufin[%(nFeatures)d], bufout[%(nFeatures)s]; + + """ % dict( + name=name, + nFeatures=nFeatures, + ) + ) + + for key, transformer, columns in transformers: + lines.append("// Transforming %s columns" % key) + if transformer == 'passthrough': + for column in columns: + lines.append(""" + ret [%(output)d] = input[%(column)d]; + """%dict(output=index_mapping.index(column), column=column)) + else: + for iCol, column in enumerate(columns): + lines.append(""" + bufin [%(iCol)d] = input[%(column)d]; + """%dict(iCol=iCol, column=column)) + lines.append (""" + %(name)s (bufout, bufin); + """ % dict(name=key)) + for iCol, column in enumerate(columns): + lines.append(""" + ret[%(index_out)d] = bufout[%(iCol)d]; + """ % dict(index_out=index_mapping.index(column), iCol=iCol)) + + lines.append (""" + return ret; + } + """) + + ## Check for not-invertible models + ## Any dropped columns? + if any([t == 'drop' for _, t, _ in model.transformers_]): + return "\n".join(lines) + + ## Any columns appearing twice? + if any([index_mapping.count(c)>1 for c in index_mapping]): + return "\n".join(lines) + + ## Any transformer not implementing an inverse transform? + if not all([t == 'passthrough' or hasattr(t, 'inverse_transform')] for _,t,_ in transformers): + return "\n".join(lines) + + index_mapping = [index_mapping.index(c) for c in range(len(index_mapping))] + + lines.append(""" + extern "C" + FLOAT_T* %(name)s_inverse (FLOAT_T* ret, const FLOAT_T *input) + { + int c; + FLOAT_T bufin[%(nFeatures)d], bufout[%(nFeatures)s]; + + """ % dict( + name=name, + nFeatures=nFeatures, + ) + ) + + for key, transformer, columns in transformers: + lines.append("// Transforming %s columns" % key) + if transformer == 'passthrough': + for column in columns: + lines.append(""" + ret [%(output)d] = input[%(column)d]; + """%dict(output=index_mapping.index(column), column=column)) + else: + for iCol, column in enumerate(columns): + lines.append(""" + bufin [%(iCol)d] = input[%(column)d]; + """%dict(iCol=iCol, column=column)) + lines.append (""" + %(name)s_inverse (bufout, bufin); + """ % dict(name=key)) + for iCol, column in enumerate(columns): + lines.append(""" + ret[%(index_out)d] = bufout[%(iCol)d]; + """ % dict(index_out=index_mapping.index(column), iCol=iCol)) + + lines.append (""" + return ret; + } + """) + + return "\n".join(lines) + diff --git a/scikinC/FunctionTransformerConverter.py b/scikinC/FunctionTransformerConverter.py new file mode 100644 index 0000000..74f548e --- /dev/null +++ b/scikinC/FunctionTransformerConverter.py @@ -0,0 +1,97 @@ +import numpy as np + +from scikinC import BaseConverter +from ._tools import array2c + + +class FunctionTransformerConverter (BaseConverter): + def convert(self, model, name=None): + lines = self.header() + + if not hasattr(model, 'n_features_in_'): + raise NotImplementedError( + "Conversion requires its n_features_in_ attribute to be set") + + nFeatures = model.n_features_in_ + + func_dict = { + None: '{x}', + np.log1p: 'log(1+{x})', + np.expm1: 'exp({x})-1', + np.arcsin: 'asin({x})', + np.arccos: 'acos({x})', + np.arctan: 'atan({x})', + np.abs: 'fabs({x})', + } + + if model.func is not None or model.inverse_func is not None: + lines.append("#include ") + + c_funcs = ('sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'exp', 'log', 'log10', 'sqrt', 'ceil', 'floor') + func_dict.update({getattr(np, f): "%s({x})"%f for f in c_funcs}) + + if hasattr(model, 'func_inC'): + fwd = model.func_inC + elif model.func in func_dict.keys(): + fwd = func_dict[model.func] + else: + raise NotImplementedError( + "Translation of function %s not implemented nor defined as func_inC argument" + % str(model.func)) + + + if hasattr(model, 'inverse_func_inC'): + bwd = model.inverse_func_inC + elif model.inverse_func in func_dict.keys(): + bwd = func_dict[model.inverse_func] + else: + raise NotImplementedError( + "Translation of function %s not implemented nor defined as inverse_func_inC argument" + % str(model.inverse_func)) + + + ## Input sanitization + if any([banned in fwd for banned in (';', '//', '/*', '*/')]): + raise ValueError("Invalid implementation: %s" % fwd); + if any([banned in bwd for banned in (';', '//', '/*', '*/')]): + raise ValueError("Invalid implementation: %s" % bwd); + + + lines.append(""" + extern "C" + FLOAT_T* %(name)s (FLOAT_T* ret, const FLOAT_T *input) + { + int c; + + for (int c = 0; c < %(nFeatures)d; ++c) + ret [c] = %(func)s; + + return ret; + } + """ % dict( + name=name, + nFeatures=nFeatures, + func=fwd.format(x='input[c]'), + ) + ) + + lines.append ( """ + extern "C" + FLOAT_T * %(name)s_inverse(FLOAT_T * ret, const FLOAT_T * input) + { + int c; + + for (int c=0; c < %(nFeatures)d; ++c) + ret [c]= %(func)s; + + return ret; + } + """ % dict ( + name=name, + nFeatures = nFeatures, + func=bwd.format(x='input[c]'), + ) + ) + + + return "\n".join(lines) diff --git a/scikinC/QuantileTransformerConverter.py b/scikinC/QuantileTransformerConverter.py index cb97fbb..1c2ac4a 100644 --- a/scikinC/QuantileTransformerConverter.py +++ b/scikinC/QuantileTransformerConverter.py @@ -2,44 +2,19 @@ import sys from scikinC import BaseConverter from scipy import stats -from ._tools import array2c +from ._tools import array2c, get_interpolation_function class QuantileTransformerConverter (BaseConverter): - def convert (self, model, name = None): + def convert (self, model, name=None): lines = self.header() distr = model.output_distribution if distr not in ['normal', 'uniform']: raise NotImplementedError ("Unexpected distribution %s" % distr) - lines . append ( """ - extern "C" - FLOAT_T qtc_interpolate_for_%(name)s ( FLOAT_T x, FLOAT_T *xs, FLOAT_T *ys, int N ) - { - int min = 0; - int max = N; - int n; - - if (N<=1) return ys[0]; - - if (x <= xs[0]) return ys[0]; - if (x >= xs[N-1]) return ys[N-1]; - - - for (;;) - { - n = (min + max)/2; - if ( x < xs[n] ) - max = n; - else if ( x >= xs[n+1] ) - min = n; - else - break; - } - - return (x - xs[n])/(xs[n+1]-xs[n])*(ys[n+1]-ys[n]) + ys[n]; - } - """ % dict(name = name)); + lines.append ( + get_interpolation_function('qtc_interpolate_for_%s'%(name)) + ) q = model.quantiles_ nQuantiles = model.quantiles_.shape[0] diff --git a/scikinC/__init__.py b/scikinC/__init__.py index 47f5d4b..ab8008f 100644 --- a/scikinC/__init__.py +++ b/scikinC/__init__.py @@ -16,6 +16,8 @@ 'DecorrTransformer': 'DecorrTransformerConverter', 'Pipeline': 'PipelineConverter', 'FastQuantileLayer': 'FastQuantileLayerConverter', + 'FunctionTransformer': 'FunctionTransformerConverter', + 'ColumnTransformer': 'ColumnTransformerConverter', ## Keras 'Sequential': 'KerasSequentialConverter', diff --git a/scikinC/_tools.py b/scikinC/_tools.py index 4754ccc..fd3e311 100644 --- a/scikinC/_tools.py +++ b/scikinC/_tools.py @@ -53,3 +53,34 @@ def retrieve_prior (bdt): ) +################################################################################ +def get_interpolation_function (func_name): + return """ + extern "C" + FLOAT_T %(func_name)s ( FLOAT_T x, FLOAT_T *xs, FLOAT_T *ys, int N ) + { + int min = 0; + int max = N; + int n; + + if (N<=1) return ys[0]; + + if (x <= xs[0]) return ys[0]; + if (x >= xs[N-1]) return ys[N-1]; + + + for (;;) + { + n = (min + max)/2; + if ( x < xs[n] ) + max = n; + else if ( x >= xs[n+1] ) + min = n; + else + break; + } + + return (x - xs[n])/(xs[n+1]-xs[n])*(ys[n+1]-ys[n]) + ys[n]; + } + """ % dict(func_name=func_name); + diff --git a/test/test_ColumnTransformerConverter.py b/test/test_ColumnTransformerConverter.py new file mode 100644 index 0000000..db3a61b --- /dev/null +++ b/test/test_ColumnTransformerConverter.py @@ -0,0 +1,116 @@ +import numpy as np +from sklearn.preprocessing import FunctionTransformer, QuantileTransformer +from sklearn.compose import ColumnTransformer + +# PyTest testing infrastructure +import pytest + +# Local testing infrastructure +from wrap import deploy_pickle + +################################################################################ +## Test preparation +@pytest.fixture +def passthrough_transformer(): + transformer_ = ColumnTransformer([], remainder='passthrough') + X = np.random.uniform (20,30,(1000, 10)) + transformer_.fit (X) + return transformer_ + + +@pytest.fixture +def double_passthrough_transformer(): + transformer_ = ColumnTransformer([ + ('keep1', 'passthrough', [0,1]), + ('keep2', 'passthrough', [3,4]), + ]) + X = np.random.uniform (20,30,(1000, 10)) + transformer_.fit (X) + return transformer_ + +@pytest.fixture +def qt_and_passthrough_transformer(): + transformer_ = ColumnTransformer([ + ('qt', QuantileTransformer(output_distribution='normal'), [0,1]), + ], remainder='passthrough') + X = np.random.uniform (20,30,(1000, 10)) + transformer_.fit (X) + return transformer_ + + +@pytest.fixture +def qt_and_ft_transformer_only(): + transformer_ = ColumnTransformer([ + ('qt', QuantileTransformer(output_distribution='normal'), [0,1,2,3,4]), + ('ft', FunctionTransformer(), [5,6,7,8,9]), + ]) + X = np.random.uniform (20,30,(1000, 10)) + transformer_.fit (X) + return transformer_ + + +@pytest.fixture +def qt_and_ft_transformer_dropping(): + transformer_ = ColumnTransformer([ + ('qt', QuantileTransformer(output_distribution='normal'), [0,1]), + ('ft', FunctionTransformer(), [3,4]), + ], remainder='drop') + X = np.random.uniform (20,30,(1000, 10)) + transformer_.fit (X) + return transformer_ + + + +transformers = [ + 'passthrough_transformer', + 'double_passthrough_transformer', + 'qt_and_passthrough_transformer', + 'qt_and_ft_transformer_only', + 'qt_and_ft_transformer_dropping', + ] + +invertible_transformers = [ + 'passthrough_transformer', + 'qt_and_passthrough_transformer', + 'qt_and_ft_transformer_only', + ] + + +################################################################################ +## Real tests +@pytest.mark.parametrize ('scaler', transformers) +def test_forward (scaler, request): + scaler = request.getfixturevalue(scaler) + deployed = deploy_pickle("functiontransformer", scaler) + xtest = np.random.uniform (21,29, 10) + py = scaler.transform (xtest[None]) + print (py.shape) + c = deployed.transform (py.shape[1], xtest) + print (xtest, "->", c, " instead of: ", py) + assert np.abs(py-c).max() < 1e-4 + + +@pytest.mark.parametrize ('scaler', invertible_transformers) +def test_inverse (scaler, request): + scaler = request.getfixturevalue(scaler) + deployed = deploy_pickle("function_transformer", scaler) + xtest = np.random.uniform (0,1, 10) + py = np.empty (10) + counter = 0 + for _, transform, columns in scaler.transformers_: + inputs = xtest[counter:counter+len(columns)] + counter += len(columns) + if transform == 'passthrough': + py[columns] = inputs + else: + py[columns] = transform.inverse_transform ([inputs])[0] + + c = deployed.transform_inverse (len(py), xtest) + + print (np.c_ [xtest, c, py]) + assert np.abs(py-c).max() < 1e-4 + + + + + diff --git a/test/test_FunctionTransformerConverter.py b/test/test_FunctionTransformerConverter.py new file mode 100644 index 0000000..5a83976 --- /dev/null +++ b/test/test_FunctionTransformerConverter.py @@ -0,0 +1,75 @@ +import numpy as np +from sklearn.preprocessing import FunctionTransformer + +# PyTest testing infrastructure +import pytest + +# Local testing infrastructure +from wrap import deploy_pickle + +################################################################################ +## Test preparation +@pytest.fixture +def empty_transformer(): + transformer_ = FunctionTransformer(validate=True) + X = np.random.uniform (20,30,(1000, 10)) + transformer_.fit (X) + return transformer_ + +@pytest.fixture +def log_transformer(): + transformer_ = FunctionTransformer(np.log, np.exp, validate=True) + X = np.random.uniform (20,30,(1000, 10)) + transformer_.fit (X) + return transformer_ + +@pytest.fixture +def custom_transformer(): + transformer_ = FunctionTransformer(np.square, np.sqrt, validate=True) + transformer_.func_inC = 'pow({x}, 2)' + X = np.random.uniform (20,30,(1000, 10)) + transformer_.fit (X) + return transformer_ + + +@pytest.fixture +def empty_transformer_wo_fit(): + transformer_ = FunctionTransformer() + transformer_.n_features_in_ = 10 + return transformer_ + + + +scalers = [ + 'empty_transformer', + 'log_transformer', + 'custom_transformer', + 'empty_transformer_wo_fit', + ] + + +################################################################################ +## Real tests +@pytest.mark.parametrize ('scaler', scalers) +def test_forward (scaler, request): + scaler = request.getfixturevalue(scaler) + deployed = deploy_pickle("functiontransformer", scaler) + xtest = np.random.uniform (21,29, 10) + py = scaler.transform (xtest[None]) + c = deployed.transform (10, xtest) + print (xtest, "->", c, " instead of: ", py) + assert np.abs(py-c).max() < 1e-4 + + +@pytest.mark.parametrize ('scaler', scalers) +def test_inverse (scaler, request): + scaler = request.getfixturevalue(scaler) + deployed = deploy_pickle("function_transformer", scaler) + xtest = np.random.uniform (0,1, 10) + py = scaler.inverse_transform (xtest[None]) + c = deployed.transform_inverse (10, xtest) + assert np.abs(py-c).max() < 1e-4 + + + + From 4835946c39fde93a8932da2a4588950e6d2c3945 Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Thu, 25 Nov 2021 12:31:41 +0100 Subject: [PATCH 2/3] fixes for making columntransformer compatible with @mbarbetti's stuff as for screen -r old --- scikinC/ColumnTransformerConverter.py | 43 +++++++++++++-------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/scikinC/ColumnTransformerConverter.py b/scikinC/ColumnTransformerConverter.py index 2c62702..20fc516 100644 --- a/scikinC/ColumnTransformerConverter.py +++ b/scikinC/ColumnTransformerConverter.py @@ -17,11 +17,13 @@ def convert(self, model, name=None): keys = [] transformers = [] for key, transformer, columns in model.transformers_: - if transformer == 'drop': + if transformer == 'drop' or len(columns) == 0: continue - if not all([isinstance(c, int) for c in columns]): - raise NotImplementedError ("Columns can only be indexed with integers") + if not all([isinstance(c, int) or int(c) == c for c in columns]): + + raise NotImplementedError ("Columns can only be indexed with integers, got", + [type(c) for c in columns]) index_mapping += columns @@ -31,7 +33,10 @@ def convert(self, model, name=None): key.append (str(1+len(keys))) if isinstance(transformer, (FunctionTransformer,)): - transformer.n_features_in_ = len(columns) + if transformer.func is None and transformer.inverse_func is None: + transformer = 'passthrough' + else: + transformer.n_features_in_ = len(columns) transformers.append (('colcnv_%s_%s' % (name, key), transformer, columns)) @@ -67,16 +72,13 @@ def convert(self, model, name=None): """%dict(output=index_mapping.index(column), column=column)) else: for iCol, column in enumerate(columns): - lines.append(""" - bufin [%(iCol)d] = input[%(column)d]; - """%dict(iCol=iCol, column=column)) - lines.append (""" - %(name)s (bufout, bufin); - """ % dict(name=key)) + lines.append(""" bufin [%(iCol)d] = input[%(column)d];"""% + dict(iCol=iCol, column=column)) + lines.append (""" %(name)s (bufout, bufin);""" + % dict(name=key)) for iCol, column in enumerate(columns): - lines.append(""" - ret[%(index_out)d] = bufout[%(iCol)d]; - """ % dict(index_out=index_mapping.index(column), iCol=iCol)) + lines.append(""" ret[%(index_out)d] = bufout[%(iCol)d];"""% + dict(index_out=index_mapping.index(column), iCol=iCol)) lines.append (""" return ret; @@ -120,16 +122,13 @@ def convert(self, model, name=None): """%dict(output=index_mapping.index(column), column=column)) else: for iCol, column in enumerate(columns): - lines.append(""" - bufin [%(iCol)d] = input[%(column)d]; - """%dict(iCol=iCol, column=column)) - lines.append (""" - %(name)s_inverse (bufout, bufin); - """ % dict(name=key)) + lines.append(""" bufin [%(iCol)d] = input[%(column)d];"""% + dict(iCol=iCol, column=column)) + lines.append (""" %(name)s_inverse (bufout, bufin);"""% + dict(name=key)) for iCol, column in enumerate(columns): - lines.append(""" - ret[%(index_out)d] = bufout[%(iCol)d]; - """ % dict(index_out=index_mapping.index(column), iCol=iCol)) + lines.append(""" ret[%(index_out)d] = bufout[%(iCol)d]; """ % + dict(index_out=index_mapping.index(column), iCol=iCol)) lines.append (""" return ret; From 1d7d8b1fc1084e92e656ed71d33935ad46f2e6eb Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Fri, 3 Dec 2021 16:53:10 +0100 Subject: [PATCH 3/3] Updated minor version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1a21412..b0b95c2 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name='scikinC', # Required - version='0.1.0', # Required + version='0.2.0', # Required description='A converter for scikit learn and keras to hardcoded C function', long_description=long_description, long_description_content_type='text/markdown', # Optional (see note above)