Skip to content

Commit

Permalink
Added 'Memorizer' and 'Recaller' transformation types
Browse files Browse the repository at this point in the history
  • Loading branch information
vruusmann committed Sep 19, 2023
1 parent 981536c commit 69fd0f7
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 0 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
],
packages = [
"sklearn2pmml",
"sklearn2pmml.cross_reference",
"sklearn2pmml.decoration",
"sklearn2pmml.ensemble",
"sklearn2pmml.expression",
Expand Down
42 changes: 42 additions & 0 deletions sklearn2pmml/cross_reference/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from sklearn.base import BaseEstimator, TransformerMixin

import numpy

class _BaseMemoryManager(BaseEstimator, TransformerMixin):

def __init__(self, memory, names):
self.memory = memory
if not isinstance(names, list):
raise TypeError()
self.names = names

class Memorizer(_BaseMemoryManager):

def __init__(self, memory, names):
super(Memorizer, self).__init__(memory, names)

def fit(self, X, y = None):
if X.shape[1] != len(self.names):
raise ValueError()
return self

def transform(self, X):
for idx, name in enumerate(self.names):
x = X[:, idx]
self.memory[name] = x.copy()
return numpy.empty(shape = (X.shape[0], 0), dtype = int)

class Recaller(_BaseMemoryManager):

def __init__(self, memory, names):
super(Recaller, self).__init__(memory, names)

def fit(self, X, y = None):
return self

def transform(self, X):
result = []
for idx, name in enumerate(self.names):
x = self.memory[name]
result.append(x.copy())
return numpy.asarray(result).T
56 changes: 56 additions & 0 deletions sklearn2pmml/cross_reference/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from pandas import DataFrame
from sklearn2pmml.cross_reference import Memorizer, Recaller
from unittest import TestCase

import numpy

class MemorizerTest(TestCase):

def test_fit_transform(self):
memory = dict()
self.assertEqual(0, len(memory))
memorizer = Memorizer(memory, ["int"])
X = numpy.asarray([[-1], [1]])
Xt = memorizer.fit_transform(X)
self.assertEqual((2, 0), Xt.shape)
self.assertEqual(1, len(memory))
self.assertEqual([-1, 1], memory["int"].tolist())

memory = DataFrame()
self.assertEqual((0, 0), memory.shape)
memorizer = Memorizer(memory, ["int", "float", "str"])
X = numpy.asarray([[1, 1.0, "one"], [2, 2.0, "two"], [3, 3.0, "three"]])
Xt = memorizer.fit_transform(X)
self.assertEqual((3, 0), Xt.shape)
self.assertEqual((3, 3), memory.shape)
self.assertEqual(["1", "2", "3"], memory["int"].tolist())
self.assertEqual([1, 2, 3], memory["int"].astype(int).tolist())
self.assertEqual([str(1.0), str(2.0), str(3.0)], memory["float"].tolist())
self.assertEqual([1.0, 2.0, 3.0], memory["float"].astype(float).tolist())
self.assertEqual(["one", "two", "three"], memory["str"].tolist())

class RecallerTest(TestCase):

def test_fit_transform(self):
X = numpy.empty((100, 5), dtype = str)

memory = {
"int": [-1, 1]
}
recaller = Recaller(memory, ["int"])
Xt = recaller.fit_transform(X)
self.assertEqual((2, 1), Xt.shape)
self.assertEqual([-1, 1], Xt[:, 0].tolist())

memory = DataFrame([[1, 1.0, "one"], [2, 2.0, "two"], [3, 3.0, "three"]], columns = ["int", "float", "str"])
self.assertEqual((3, 3), memory.shape)
recaller = Recaller(memory, ["int"])
Xt = recaller.fit_transform(X)
self.assertEqual((3, 1), Xt.shape)
self.assertEqual([1, 2, 3], Xt[:, 0].tolist())
recaller = Recaller(memory, ["int", "float", "str"])
Xt = recaller.fit_transform(X)
self.assertEqual((3, 3), Xt.shape)
self.assertEqual([1, 2, 3], Xt[:, 0].tolist())
self.assertEqual([1.0, 2.0, 3.0], Xt[:, 1].tolist())
self.assertEqual(["one", "two", "three"], Xt[:, 2].tolist())

0 comments on commit 69fd0f7

Please sign in to comment.