diff --git a/skl2onnx/_supported_operators.py b/skl2onnx/_supported_operators.py index b4a5a7278..d609c9dc5 100644 --- a/skl2onnx/_supported_operators.py +++ b/skl2onnx/_supported_operators.py @@ -252,6 +252,7 @@ MaxAbsScaler, MinMaxScaler, PolynomialFeatures, + QuantileTransformer, RobustScaler, StandardScaler, ) @@ -436,6 +437,7 @@ def build_sklearn_operator_name_map(): PolynomialFeatures, PowerTransformer, QuadraticDiscriminantAnalysis, + QuantileTransformer, RadiusNeighborsClassifier, RadiusNeighborsRegressor, RandomForestClassifier, diff --git a/skl2onnx/operator_converters/__init__.py b/skl2onnx/operator_converters/__init__.py index bc3b04b89..94190dfe6 100644 --- a/skl2onnx/operator_converters/__init__.py +++ b/skl2onnx/operator_converters/__init__.py @@ -50,6 +50,7 @@ from . import polynomial_features from . import power_transformer from . import quadratic_discriminant_analysis +from . import quantile_transformer from . import random_forest from . import random_projection from . import random_trees_embedding @@ -116,6 +117,7 @@ polynomial_features, power_transformer, quadratic_discriminant_analysis, + quantile_transformer, random_forest, random_projection, random_trees_embedding, diff --git a/skl2onnx/operator_converters/quantile_transformer.py b/skl2onnx/operator_converters/quantile_transformer.py new file mode 100644 index 000000000..8b67811be --- /dev/null +++ b/skl2onnx/operator_converters/quantile_transformer.py @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +from ..common._registration import register_converter +from ..common._topology import Scope, Operator +from ..common._container import ModelComponentContainer +from ..common.data_types import guess_numpy_type + + +def convert_quantile_transformer( + scope: Scope, operator: Operator, container: ModelComponentContainer +): + """Converter for QuantileTransformer""" + # op_in = operator.inputs[0] + # op_out = operator.outputs[0].full_name + op = operator.raw_operator + # opv = container.target_opset + dtype = guess_numpy_type(operator.inputs[0].type) + if dtype != np.float64: + dtype = np.float32 + if op.output_distribution != "uniform": + raise RuntimeError( + "Conversion of QuantileTransformer with output_distribution=%r " + "is not supported." % op.output_distribution + ) + + # ref = op.references_ + # quantiles = op.quantiles_ + + # Code of QuantileTransformer.transform + # lower_bound_x = quantiles[0] + # upper_bound_x = quantiles[-1] + # lower_bound_y = 0 + # upper_bound_y = 1 + # lower_bounds_idx = (X_col == lower_bound_x) + # upper_bounds_idx = (X_col == upper_bound_x) + + # isfinite_mask = ~np.isnan(X_col) + # xcolf = X_col[isfinite_mask] + # X_col[isfinite_mask] = .5 * ( + # np.interp(xcolf, quantiles, self.references_) + # - np.interp(-xcolf, -quantiles[::-1], -self.references_[::-1])) + # X_col[upper_bounds_idx] = upper_bound_y + # X_col[lower_bounds_idx] = lower_bound_y + + # Strategy + # implement interpolation in Onnx + # * use 2 trees to determine the quantile x (qx, dx) + # * use 2 trees to determine the quantile y (qy, dy) + # do : (x - q) * dx * dy + qy + + # y.set_onnx_name_prefix('quantile') + # y.add_to(scope, container) + raise NotImplementedError() + + +register_converter("SklearnQuantileTransformer", convert_quantile_transformer) diff --git a/skl2onnx/shape_calculators/__init__.py b/skl2onnx/shape_calculators/__init__.py index 6a4fc36be..8f56823cd 100644 --- a/skl2onnx/shape_calculators/__init__.py +++ b/skl2onnx/shape_calculators/__init__.py @@ -39,6 +39,7 @@ from . import polynomial_features from . import power_transformer from . import quadratic_discriminant_analysis +from . import quantile_transformer from . import random_projection from . import random_trees_embedding from . import replace_op @@ -90,6 +91,7 @@ polynomial_features, power_transformer, quadratic_discriminant_analysis, + quantile_transformer, random_projection, random_trees_embedding, replace_op, diff --git a/skl2onnx/shape_calculators/quantile_transformer.py b/skl2onnx/shape_calculators/quantile_transformer.py new file mode 100644 index 000000000..c77aaeeee --- /dev/null +++ b/skl2onnx/shape_calculators/quantile_transformer.py @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: Apache-2.0 + +import copy +from ..common._registration import register_shape_calculator +from ..common.utils import check_input_and_output_numbers, check_input_and_output_types +from ..common.data_types import FloatTensorType, Int64TensorType, DoubleTensorType + + +def quantile_transformer_shape_calculator(operator): + """Shape calculator for QuantileTransformer""" + check_input_and_output_numbers(operator, output_count_range=1) + check_input_and_output_types( + operator, good_input_types=[FloatTensorType, Int64TensorType, DoubleTensorType] + ) + + N = operator.inputs[0].get_first_dimension() + model = operator.raw_operator + operator.outputs[0].type = copy.deepcopy(operator.inputs[0].type) + operator.outputs[0].type.shape = [N, model.quantiles_.shape[1]] + + +register_shape_calculator( + "SklearnQuantileTransformer", quantile_transformer_shape_calculator +) diff --git a/tests/test_sklearn_quantile_converter.py b/tests/test_sklearn_quantile_converter.py new file mode 100644 index 000000000..b05311450 --- /dev/null +++ b/tests/test_sklearn_quantile_converter.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: Apache-2.0 + +""" +Tests scikit-learn's polynomial features converter. +""" +import unittest +from distutils.version import StrictVersion +import numpy as np +import onnx +from sklearn.preprocessing import QuantileTransformer +from skl2onnx import convert_sklearn +from skl2onnx.common.data_types import FloatTensorType +from test_utils import dump_data_and_model + + +class TestSklearnQuantileTransformer(unittest.TestCase): + @unittest.skipIf( + StrictVersion(onnx.__version__) < StrictVersion("1.4.0"), + reason="ConstantOfShape not available", + ) + def test_quantile_transformer(self): + X = np.empty((100, 2), dtype=np.float32) + X[:, 0] = np.arange(X.shape[0]) + X[:, 1] = np.arange(X.shape[0]) * 2 + model = QuantileTransformer(n_quantiles=6).fit(X) + model_onnx = convert_sklearn( + model, "test", [("input", FloatTensorType([None, X.shape[1]]))] + ) + self.assertTrue(model_onnx is not None) + dump_data_and_model( + X.astype(np.float32), + model, + model_onnx, + basename="SklearnQuantileTransformer", + ) + + +if __name__ == "__main__": + unittest.main()