Skip to content

Shifu Plugin Mahout

lisahua edited this page Jul 31, 2014 · 9 revisions

Table of Content

Example of using Shifu-plugin-Mahout

Neural Network

Initialize Mahout Neural Network model and PMML model with stats and transformations.
protected void initModels() {
		pmml = PMMLUtils.loadPMML(initPmmlPath);
		List<MahoutDataPair> inputDataSet = new MahoutTestDataGenerator(inputData, pmml).getMahoutDataPair();
		// create Mahout NeuralNetwork model
		mlModel = new MultilayerPerceptron();
		mlModel.addLayer(30, false, "Identity");// inputFields,isFinalLayer,squashFunction
		mlModel.addLayer(45, false, "Sigmoid");
		mlModel.addLayer(1, true, "Sigmoid");
		for (MahoutDataPair pair : inputDataSet) {
		   mlModel.trainOnline(pair.getVectorAsInputVector());
		}
}
Adapt Mahout model to PMML model
protected void adaptModelToPMML() {
	Model pmmlNN = pmml.getModels().get(0);
	pmmlNN = new MahoutNeuralNetworkToPMMLModel().adaptMLModelToPMML(mlModel,(org.dmg.pmml.NeuralNetwork) pmmlNN);
	pmml.getModels().set(0, pmmlNN);
}
Validate the score calculated by PMML evaluator with the score calculated by Mahout
private void evaluate(MahoutTestDataGenerator evalInput) {
	 for (Map<FieldName, String> map : evalInput.getEvaluatorInput()) {
		double[] data = evalInput.normalizeData(context);
		Assert.assertEquals(getPMMLEvaluatorResult(map),mlModel.getOutput(new DenseVector(data)).get(0), DELTA);
}

Mahout Logistic Regression Model

Initialize Mahout Logistic Regression model and PMML model with stats and transformations.
protected void initModels() {
	pmml = PMMLUtils.loadPMML(initPmmlPath);
	List<MahoutDataPair> inputDataSet = new MahoutTestDataGenerator(inputData, pmml).getMahoutDataPair();
	lrModel = new OnlineLogisticRegression(2, 30, new L1());//numCategory, numFeatures, PriorFunction
	for (MahoutDataPair pair : inputDataSet) {
		lrModel.train(pair.getActual(), pair.getVector());
	}
}
Adapt Mahout model to PMML model
protected void adaptModelToPMML() {
	Model pmmlLR = pmml.getModels().get(0);
	pmmlLR = new MahoutLogisticRegressionToPMMLModel().adaptMLModelToPMML(mlModel,(org.dmg.pmml.RegressionModel) pmmlLR);
	pmml.getModels().set(0, pmmlLR);
}
Validate the score calculated by PMML evaluator with the score calculated by Mahout
private void evaluate(MahoutTestDataGenerator evalInput) {
	 for (Map<FieldName, String> map : evalInput.getEvaluatorInput()) {
		double[] data = evalInput.normalizeData(context);
		Assert.assertEquals(getPMMLEvaluatorResult(map),lrModel.classifyScalar(new DenseVector(data)), DELTA);
}

More about Mahout Model

Neural Network

1.In Mahout, the bias=1 comes as the first neuron in each layer that has bias neuron. Refer to the function trainByInstanceGradientDescent in the class org.apache.mahout.classifier.mlp.NeuralNetwork.

 private Matrix[] trainByInstanceGradientDescent(Vector trainingInstance) {
    int inputDimension = this.layerSizeList.get(0) - 1;
    Vector inputInstance = new DenseVector(this.layerSizeList.get(0));
    inputInstance.set(0, 1); // add bias
    for (int i = 0; i < inputDimension; ++i) {
      inputInstance.set(i + 1, trainingInstance.get(i));
    }...
}

2.InputVector is the field vector followed by the actual vector. The len(inputVector) = len(fieldVector)+len(actual vector).

3.Since squashFunctionList in Mahout NeuralNetwork class is not accessible from outside, by default, we set squashFunction (ActivationFunction) to sigmoid.

Currently, Mahout only supports identity and sigmoid activation functions.

  public static DoubleFunction getDerivativeDoubleFunction(String function) {
    if (function.equalsIgnoreCase("Identity")) {
      return derivativeIdentityFunction;
    } else if (function.equalsIgnoreCase("Sigmoid")) {
      return Functions.SIGMOIDGRADIENT;
    } else {
      throw new IllegalArgumentException("Function not supported.");
    }
  }

PMML Conversion

Convert Neural Network to PMML Neural Network Model

       public class MahoutNeuralNetworkToPMML  implements ModelToPMML<org.dmg.pmml.NeuralNetwork, org.apache.mahout.classifier.mlp.NeuralNetwork> {
       	public org.dmg.pmml.NeuralNetwork adaptMLModelToPMML(org.apache.mahout.classifier.mlp.NeuralNetwork nnModel, org.dmg.pmml.NeuralNetwork pmmlModel) {
       		pmmlModel = new NeuralNetworkModelIntegrator().adaptPMMLStatsModel(pmmlModel);
       		Matrix[] matrixList = nnModel.getWeightMatrices();
       		for (int layerIndex = 0; layerIndex < matrixList.length; layerIndex++) {
       		   NeuralLayer layer = new NeuralLayer();
       		   Matrix matrix = matrixList[layerIndex];
       		   // TODO since squashFunctionList in Mahout is not accessible, by default, we set ActivationFunction to sigmoid
       		   layer.setActivationFunction(ActivationFunctionType.LOGISTIC);
       		   for (int j = 0; j < rowSize; j++) {
       		      Neuron neuron = new Neuron(String.valueOf((layerIndex + 1) + "," + j));
       		      neuron.setBias(0.0);// bias of each neuron, set to 0
       		      for (int k = 1; k < columnSize; k++) {
       		         neuron.withConnections(new Connection(String.valueOf(layerIndex + "," + (k - 1)), matrix.get(j,k)));
                          }
       		      // bias neuron for each layer, set to bias=1
       		      neuron.withConnections(new Connection(PluginConstants.biasValue, matrix.get(j, 0)));
       		   }
       		   layer.withNeurons(neuron);
          }// finish build Neuron
          pmmlModel.withNeuralLayers(layer);
        }// finish build layer
        pmmlModel.withNeuralOutputs(PMMLAdapterCommonUtil.getOutputFields(pmmlModel.getMiningSchema(), matrixList.length));
        return pmmlModel;
      }
    }

Convert Logistic Regression Model to PMML Regression Model

       public class MahoutLogisticRegressionToPMMLModel implements
       		ModelToPMML<org.dmg.pmml.RegressionModel, org.apache.mahout.classifier.sgd.OnlineLogisticRegression> {
       	public RegressionModel adaptMLModelToPMML(org.apache.mahout.classifier.sgd.OnlineLogisticRegression lrModel, org.dmg.pmml.RegressionModel pmmlModel) {
       		Matrix matrix = lrModel.getBeta();
       		int count = matrix.getNumNondefaultElements()[0];
       		double[] weights = new double[count];
       		for (int i = 0; i < count; i++)
       			weights[i] = matrix.get(0, i);
       		return PMMLAdapterCommonUtil.getRegressionTable(weights, 0, pmmlModel);
       	}
       }

Convert PMML to Mahout model

Convert PMML to Mahout Logistic Regression Model
       public class MahoutLogisticRegressionFromPMML implements PMMLToModel<OnlineLogisticRegression, RegressionModel> {
         @Override
         public OnlineLogisticRegression createMLModelFromPMML(RegressionModel pmmlModel) {
           this.pmmlModel = pmmlModel;
           initNNLayer();
           setWeight();
           return mlModel;
         }
         private void initModelStructure() {
           mlModel = new OnlineLogisticRegression(2, numPredictors.size(),new L1());
         }
         private void setWeight() {
           List<NumericPredictor> nPredictors = pmmlModel.getRegressionTables().get(0).getNumericPredictors();
           for (int i = 0; i < nPredictors.size(); i++) {
             mlModel.setBeta(0, i, nPredictors.get(i).getCoefficient());
           }
         }

The conversion from PMML to Mahout Neural Network is similar to the conversion to Mahout Logistic Regression Model

Clone this wiki locally