-
Notifications
You must be signed in to change notification settings - Fork 108
Shifu Plugin Mahout
lisahua edited this page Jul 31, 2014
·
9 revisions
protected void initModels() {
pmml = PMMLUtils.loadPMML(initPmmlPath);
List<MahoutDataPair> inputDataSet = new MahoutTestDataGenerator(inputData, pmml).getMahoutDataPair();
// create Mahout NeuralNetwork model
mlModel = new MultilayerPerceptron();
mlModel.addLayer(30, false, "Identity");// inputFields,isFinalLayer,squashFunction
mlModel.addLayer(45, false, "Sigmoid");
mlModel.addLayer(1, true, "Sigmoid");
for (MahoutDataPair pair : inputDataSet) {
mlModel.trainOnline(pair.getVectorAsInputVector());
}
}
protected void adaptModelToPMML() {
Model pmmlNN = pmml.getModels().get(0);
pmmlNN = new MahoutNeuralNetworkToPMMLModel().adaptMLModelToPMML(mlModel,(org.dmg.pmml.NeuralNetwork) pmmlNN);
pmml.getModels().set(0, pmmlNN);
}
private void evaluate(MahoutTestDataGenerator evalInput) {
for (Map<FieldName, String> map : evalInput.getEvaluatorInput()) {
double[] data = evalInput.normalizeData(context);
Assert.assertEquals(getPMMLEvaluatorResult(map),mlModel.getOutput(new DenseVector(data)).get(0), DELTA);
}
protected void initModels() {
pmml = PMMLUtils.loadPMML(initPmmlPath);
List<MahoutDataPair> inputDataSet = new MahoutTestDataGenerator(inputData, pmml).getMahoutDataPair();
lrModel = new OnlineLogisticRegression(2, 30, new L1());//numCategory, numFeatures, PriorFunction
for (MahoutDataPair pair : inputDataSet) {
lrModel.train(pair.getActual(), pair.getVector());
}
}
protected void adaptModelToPMML() {
Model pmmlLR = pmml.getModels().get(0);
pmmlLR = new MahoutLogisticRegressionToPMMLModel().adaptMLModelToPMML(mlModel,(org.dmg.pmml.RegressionModel) pmmlLR);
pmml.getModels().set(0, pmmlLR);
}
private void evaluate(MahoutTestDataGenerator evalInput) {
for (Map<FieldName, String> map : evalInput.getEvaluatorInput()) {
double[] data = evalInput.normalizeData(context);
Assert.assertEquals(getPMMLEvaluatorResult(map),lrModel.classifyScalar(new DenseVector(data)), DELTA);
}
1.In Mahout, the bias=1
comes as the first neuron in each layer that has bias neuron. Refer to the function trainByInstanceGradientDescent
in the class org.apache.mahout.classifier.mlp.NeuralNetwork
.
private Matrix[] trainByInstanceGradientDescent(Vector trainingInstance) {
int inputDimension = this.layerSizeList.get(0) - 1;
Vector inputInstance = new DenseVector(this.layerSizeList.get(0));
inputInstance.set(0, 1); // add bias
for (int i = 0; i < inputDimension; ++i) {
inputInstance.set(i + 1, trainingInstance.get(i));
}...
}
2.InputVector is the field vector followed by the actual vector. The len(inputVector) = len(fieldVector)+len(actual vector)
.
3.Since squashFunctionList
in Mahout NeuralNetwork
class is not accessible from outside, by default, we set squashFunction
(ActivationFunction) to sigmoid
.
Currently, Mahout only supports identity
and sigmoid
activation functions.
public static DoubleFunction getDerivativeDoubleFunction(String function) {
if (function.equalsIgnoreCase("Identity")) {
return derivativeIdentityFunction;
} else if (function.equalsIgnoreCase("Sigmoid")) {
return Functions.SIGMOIDGRADIENT;
} else {
throw new IllegalArgumentException("Function not supported.");
}
}
public class MahoutNeuralNetworkToPMML implements ModelToPMML<org.dmg.pmml.NeuralNetwork, org.apache.mahout.classifier.mlp.NeuralNetwork> {
public org.dmg.pmml.NeuralNetwork adaptMLModelToPMML(org.apache.mahout.classifier.mlp.NeuralNetwork nnModel, org.dmg.pmml.NeuralNetwork pmmlModel) {
pmmlModel = new NeuralNetworkModelIntegrator().adaptPMMLStatsModel(pmmlModel);
Matrix[] matrixList = nnModel.getWeightMatrices();
for (int layerIndex = 0; layerIndex < matrixList.length; layerIndex++) {
NeuralLayer layer = new NeuralLayer();
Matrix matrix = matrixList[layerIndex];
// TODO since squashFunctionList in Mahout is not accessible, by default, we set ActivationFunction to sigmoid
layer.setActivationFunction(ActivationFunctionType.LOGISTIC);
for (int j = 0; j < rowSize; j++) {
Neuron neuron = new Neuron(String.valueOf((layerIndex + 1) + "," + j));
neuron.setBias(0.0);// bias of each neuron, set to 0
for (int k = 1; k < columnSize; k++) {
neuron.withConnections(new Connection(String.valueOf(layerIndex + "," + (k - 1)), matrix.get(j,k)));
}
// bias neuron for each layer, set to bias=1
neuron.withConnections(new Connection(PluginConstants.biasValue, matrix.get(j, 0)));
}
layer.withNeurons(neuron);
}// finish build Neuron
pmmlModel.withNeuralLayers(layer);
}// finish build layer
pmmlModel.withNeuralOutputs(PMMLAdapterCommonUtil.getOutputFields(pmmlModel.getMiningSchema(), matrixList.length));
return pmmlModel;
}
}
public class MahoutLogisticRegressionToPMMLModel implements
ModelToPMML<org.dmg.pmml.RegressionModel, org.apache.mahout.classifier.sgd.OnlineLogisticRegression> {
public RegressionModel adaptMLModelToPMML(org.apache.mahout.classifier.sgd.OnlineLogisticRegression lrModel, org.dmg.pmml.RegressionModel pmmlModel) {
Matrix matrix = lrModel.getBeta();
int count = matrix.getNumNondefaultElements()[0];
double[] weights = new double[count];
for (int i = 0; i < count; i++)
weights[i] = matrix.get(0, i);
return PMMLAdapterCommonUtil.getRegressionTable(weights, 0, pmmlModel);
}
}
public class MahoutLogisticRegressionFromPMML implements PMMLToModel<OnlineLogisticRegression, RegressionModel> {
@Override
public OnlineLogisticRegression createMLModelFromPMML(RegressionModel pmmlModel) {
this.pmmlModel = pmmlModel;
initNNLayer();
setWeight();
return mlModel;
}
private void initModelStructure() {
mlModel = new OnlineLogisticRegression(2, numPredictors.size(),new L1());
}
private void setWeight() {
List<NumericPredictor> nPredictors = pmmlModel.getRegressionTables().get(0).getNumericPredictors();
for (int i = 0; i < nPredictors.size(); i++) {
mlModel.setBeta(0, i, nPredictors.get(i).getCoefficient());
}
}
The conversion from PMML to Mahout Neural Network is similar to the conversion to Mahout Logistic Regression Model