From 94efba314f8d47aa2fd2b5b3952dc280db6e417e Mon Sep 17 00:00:00 2001
From: Veronika Maurerova <veronika.maurerova@h2o.ai>
Date: Wed, 20 Sep 2023 13:58:53 +0200
Subject: [PATCH] fix mojo bug, fix make_metrics bug, clean code

---
 .../main/java/hex/tree/uplift/UpliftDRF.java  |  4 +-
 .../java/hex/tree/uplift/UpliftDRFModel.java  |  6 +-
 .../java/hex/tree/uplift/UpliftDRFTest.java   | 59 ++++++++++++++
 .../java/hex/ModelMetricsBinomialUplift.java  | 41 ----------
 .../algos/upliftdrf/UpliftDrfMojoModel.java   | 11 +++
 .../tests/testdir_misc/pyunit_make_metrics.py | 16 ++--
 h2o-r/h2o-package/R/models.R                  | 15 +++-
 .../runit_make_metrics_uplift_binomial.R      | 81 +++++++++++++------
 8 files changed, 155 insertions(+), 78 deletions(-)

diff --git a/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java b/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java
index 562cdd070454..b5be40651918 100644
--- a/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java
+++ b/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRF.java
@@ -52,12 +52,12 @@ public UpliftDRF(boolean startup_once) {
 
     @Override
     public boolean haveMojo() {
-        return false;
+        return true;
     }
 
     @Override
     public boolean havePojo() {
-        return false;
+        return true;
     }
 
     @Override
diff --git a/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRFModel.java b/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRFModel.java
index 284ee134e41a..49698d4cf060 100644
--- a/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRFModel.java
+++ b/h2o-algos/src/main/java/hex/tree/uplift/UpliftDRFModel.java
@@ -48,7 +48,11 @@ public ModelCategory getModelCategory() {
 
         @Override
         public boolean isBinomialClassifier() {
-            return false;
+            return true;
+        }
+
+        public void setDefaultAuucThresholds(double[] defaultAuucThresholds) {
+            this._defaultAuucThresholds = defaultAuucThresholds;
         }
     }
 
diff --git a/h2o-algos/src/test/java/hex/tree/uplift/UpliftDRFTest.java b/h2o-algos/src/test/java/hex/tree/uplift/UpliftDRFTest.java
index b1a10afa934a..93b2c47ab6a3 100644
--- a/h2o-algos/src/test/java/hex/tree/uplift/UpliftDRFTest.java
+++ b/h2o-algos/src/test/java/hex/tree/uplift/UpliftDRFTest.java
@@ -3,10 +3,14 @@
 import hex.ScoreKeeper;
 import hex.genmodel.MojoModel;
 import hex.genmodel.easy.EasyPredictModelWrapper;
+import hex.genmodel.easy.RowData;
+import hex.genmodel.easy.prediction.UpliftBinomialModelPrediction;
 import hex.genmodel.utils.ArrayUtils;
 import hex.genmodel.utils.DistributionFamily;
+import org.junit.Assume;
 import org.junit.Test;
 import org.junit.runner.RunWith;
+import water.H2O;
 import water.Scope;
 import water.TestUtil;
 import water.exceptions.H2OModelBuilderIllegalArgumentException;
@@ -17,7 +21,13 @@
 import water.runner.H2ORunner;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
 import static org.junit.Assert.*;
 
@@ -391,4 +401,53 @@ public void testMojo() {
             Scope.exit();
         }
     }
+
+    @Test
+    public void testEasyPredictMojo() throws Exception {
+        try {
+            Scope.enter();
+            Frame train = new TestFrameBuilder()
+                    .withColNames("C0", "C1", "treatment", "conversion")
+                    .withVecTypes(Vec.T_NUM, Vec.T_NUM, Vec.T_CAT, Vec.T_CAT)
+                    .withDataForCol(0, ard(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0))
+                    .withDataForCol(1, ard(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0))
+                    .withDataForCol(2, ar("T", "C", "T", "T", "T", "C", "C", "C", "C", "C"))
+                    .withDataForCol(3, ar("Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No", "Yes", "Yes"))
+                    .build();
+            train.toCategoricalCol("treatment");
+            train.toCategoricalCol("conversion");
+            Scope.track_generic(train);
+            UpliftDRFModel.UpliftDRFParameters p = new UpliftDRFModel.UpliftDRFParameters();
+            p._train = train._key;
+            p._response_column = "conversion";
+            p._treatment_column = "treatment";
+            p._ntrees = 4;
+
+            UpliftDRF udrf = new UpliftDRF(p);
+            UpliftDRFModel model = udrf.trainModel().get();
+            Scope.track_generic(model);
+            MojoModel mojo = model.toMojo();
+            EasyPredictModelWrapper wrapper = new EasyPredictModelWrapper(
+                    new EasyPredictModelWrapper.Config()
+                            .setModel(mojo)
+                            .setEnableContributions(false)
+            );
+            Frame featureFr = train.subframe(mojo.features());
+            Scope.track_generic(featureFr);
+            for (int i = 0; i < featureFr.numRows(); i++) {
+                RowData row = new RowData();
+                for (String feat : featureFr.names()) {
+                    if (!featureFr.vec(feat).isNA(i)) {
+                        double value = featureFr.vec(feat).at(i);
+                        row.put(feat, value);
+                    }
+                }
+                UpliftBinomialModelPrediction pred = wrapper.predictUpliftBinomial(row);
+                assertEquals(pred.predictions.length,3);
+                assertEquals(pred.predictions[0], pred.predictions[1]-pred.predictions[2], 0);
+            }
+        } finally {
+            Scope.exit();
+        }
+    }
 }
diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java b/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java
index 5b6c86cf1999..317858f6c060 100644
--- a/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java
+++ b/h2o-core/src/main/java/hex/ModelMetricsBinomialUplift.java
@@ -70,47 +70,6 @@ public String toString() {
     protected StringBuilder appendToStringMetrics(StringBuilder sb) {
         return sb;
     }
-
-    /**
-     * Build a Binomial ModelMetrics object from target-class probabilities, from actual labels, and a given domain for both labels (and domain[1] is the target class)
-     * @param predictedProbs A Vec containing predicted probabilities
-     * @param actualLabels A Vec containing the actual labels (can be for fewer labels than what's in domain, since the predictions can be for a small subset of the data)
-     * @param treatment A Vec containing the treatment values               
-     * @param auucType Type of default AUUC
-     * @param nbins Number of bins to calculate AUUC (-1 means default value 1000, the number has to be higher than zero)  
-     * @return ModelMetrics object
-     */
-    static public ModelMetricsBinomialUplift make(Vec predictedProbs, Vec actualLabels, Vec treatment, AUUC.AUUCType auucType, int nbins) {
-        return make(predictedProbs, actualLabels, treatment, actualLabels.domain(), auucType, nbins, null);
-    }
-
-    /**
-     * Build a Binomial ModelMetrics object from target-class probabilities, from actual labels, and a given domain for both labels (and domain[1] is the target class)
-     * @param predictedProbs A Vec containing predicted probabilities
-     * @param actualLabels A Vec containing the actual labels (can be for fewer labels than what's in domain, since the predictions can be for a small subset of the data)
-     * @param treatment A Vec containing the treatment values               
-     * @param auucType Type of default AUUC
-     * @param nbins Number of bins to calculate AUUC (-1 means default value 1000, the number has to be higher than zero)  
-     * @param customAuucThresholds custom threshold to calculate AUUC, if is not specified, the thresholds will be calculated from prediction vector             
-     * @return ModelMetrics object
-     */
-    static public ModelMetricsBinomialUplift make(Vec predictedProbs, Vec actualLabels, Vec treatment, AUUC.AUUCType auucType, int nbins, double[] customAuucThresholds) {
-        return make(predictedProbs, actualLabels, treatment, actualLabels.domain(), auucType, nbins, customAuucThresholds);
-    }
-
-    /**
-     * Build a Binomial ModelMetrics object from predicted probabilities, from actual labels, and a given domain for both labels (and domain[1] is the target class)
-     * @param predictedProbs A Vec containing predicted probabilities
-     * @param actualLabels A Vec containing the actual labels (can be for fewer labels than what's in domain, since the predictions can be for a small subset of the data)
-     * @param treatment A Vec containing the treatment values               
-     * @param domain The two class labels (domain[0] is the non-target class, domain[1] is the target class, for which probabilities are given)
-     * @param auucType Type of default AUUC
-     * @param auucNbins Number of bins to calculate AUUC (-1 means default value 1000, the number has to be higher than zero)                
-     * @return ModelMetrics object
-     */
-    static public ModelMetricsBinomialUplift make(Vec predictedProbs, Vec actualLabels, Vec treatment, String[] domain, AUUC.AUUCType auucType, int auucNbins) {
-        return make(predictedProbs, actualLabels, treatment, domain, auucType, auucNbins, null);
-    }
     
     /**
      * Build a Binomial ModelMetrics object from predicted probabilities, from actual labels, and a given domain for both labels (and domain[1] is the target class)
diff --git a/h2o-genmodel/src/main/java/hex/genmodel/algos/upliftdrf/UpliftDrfMojoModel.java b/h2o-genmodel/src/main/java/hex/genmodel/algos/upliftdrf/UpliftDrfMojoModel.java
index 4b1cc72449bc..3ba78cb48859 100644
--- a/h2o-genmodel/src/main/java/hex/genmodel/algos/upliftdrf/UpliftDrfMojoModel.java
+++ b/h2o-genmodel/src/main/java/hex/genmodel/algos/upliftdrf/UpliftDrfMojoModel.java
@@ -1,5 +1,6 @@
 package hex.genmodel.algos.upliftdrf;
 
+import hex.ModelCategory;
 import hex.genmodel.algos.tree.SharedTreeMojoModel;
 
 public class UpliftDrfMojoModel extends SharedTreeMojoModel {
@@ -33,4 +34,14 @@ public double getInitF() {
     public double[] getThresholds() {
         return _thresholds;
     }
+
+    @Override
+    public int getPredsSize() {
+        return 3;
+    }
+
+    @Override
+    public int getPredsSize(ModelCategory mc) {
+        return getPredsSize();
+    }
 }
diff --git a/h2o-py/tests/testdir_misc/pyunit_make_metrics.py b/h2o-py/tests/testdir_misc/pyunit_make_metrics.py
index 96cbe64e7a5f..94cf10a95fdf 100644
--- a/h2o-py/tests/testdir_misc/pyunit_make_metrics.py
+++ b/h2o-py/tests/testdir_misc/pyunit_make_metrics.py
@@ -244,20 +244,22 @@ def pyunit_make_metrics_uplift():
     print("Performance AUUC with no custom thresholds but change nbins parameter: {}".format(m6.auuc()))
     print("thresholds: {}".format(m6.thresholds()))
 
+    tol = 1e-5
+
     # default model auuc is calculated from train data, default thresholds are from validation data
-    assert abs(model.auuc() - m0.auuc()) > 1e-5 
+    assert abs(model.auuc() - m0.auuc()) > tol 
     # model performance calculates new thresholds but from the same data with the same number of bins, so AUUCs are same
-    assert abs(m0.auuc() - m1.auuc()) < 1e-5
+    assert abs(m0.auuc() - m1.auuc()) < tol
     # make method calculates new thresholds but from the same data with the same number of bins, so AUUCs are same
-    assert abs(m1.auuc() - m2.auuc()) < 1e-5
+    assert abs(m1.auuc() - m2.auuc()) < tol
     # if we use thresholds from performance metric and use it as custom, it makes the same metrics
-    assert abs(m1.auuc() - m3.auuc()) < 1e-5
+    assert abs(m1.auuc() - m3.auuc()) < tol
     # make methods with different nbins parameter changes thresholds and AUUC
-    assert abs(m3.auuc() - m5.auuc()) > 1e-5
+    assert abs(m3.auuc() - m5.auuc()) > tol
     # performance methods with different nbins parameter changes thresholds and AUUC
-    assert abs(m3.auuc() - m6.auuc()) > 1e-5
+    assert abs(m3.auuc() - m6.auuc()) > tol
     # make and performance method with the same nbins parameter and the same data calculates the same thresholds
-    assert abs(m5.auuc() - m6.auuc()) < 1e-5
+    assert abs(m5.auuc() - m6.auuc()) < tol
     
     print("===========================")
 
diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R
index 0f67b1378443..f250950aea5d 100755
--- a/h2o-r/h2o-package/R/models.R
+++ b/h2o-r/h2o-package/R/models.R
@@ -1039,7 +1039,7 @@ h2o.feature_frequencies <- feature_frequencies.H2OModel
 #' h2o.performance(model = prostate_gbm_balanced, train = TRUE)
 #' }
 #' @export
-h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval=FALSE, data=NULL, auc_type="NONE", custom_auuc_thresholds=NULL) {
+h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval=FALSE, data=NULL, auc_type="NONE", auuc_type="NONE", auuc_nbins=-1) {
 
   # data is now deprecated and the new arg name is newdata
   if (!is.null(data)) {
@@ -1061,6 +1061,9 @@ h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval=
   if( missingNewdata && auc_type != "NONE") {
     print("WARNING: The `auc_type` parameter is set but it is not used because the `newdata` parameter is NULL.")
   }
+  if( missingNewdata && auuc_type != "NONE") {
+    print("WARNING: The `auuc_type` parameter is set but it is not used because the `newdata` parameter is NULL.")
+  }  
   if( !missingNewdata ) {
     if (!is.null(model@parameters$y)  &&  !(model@parameters$y %in% names(newdata))) {
       print("WARNING: Model metrics cannot be calculated and metric_json is empty due to the absence of the response column in your dataset.")
@@ -1075,9 +1078,13 @@ h2o.performance <- function(model, newdata=NULL, train=FALSE, valid=FALSE, xval=
     } else if(!is.null(model@parameters$auc_type) && model@parameters$auc_type != "NONE"){
         parms[["auc_type"]] <- model@parameters$auc_type
     }
-    if(!is.null(custom_auuc_thresholds)){
-        parms[["custom_auuc_thresholds"]] <- paste("[", paste(custom_auuc_thresholds, collapse = ", "),"]")
-
+    if(auc_type != "NONE"){
+        parms[["auuc_type"]] <- auuc_type
+    } else if(!is.null(model@parameters$auuc_type) && model@parameters$auuc_type != "NONE"){
+        parms[["auuc_type"]] <- model@parameters$auuc_type
+    }  
+    if(auuc_nbins > 0){
+        parms[["auuc_nbins"]] <- auuc_nbins
     }  
     res <- .h2o.__remoteSend(method = "POST", .h2o.__MODEL_METRICS(model@model_id, newdata.id), .params = parms)
 
diff --git a/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R b/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R
index 0fa7e3cd22b0..04d42667a222 100644
--- a/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R
+++ b/h2o-r/tests/testdir_misc/runit_make_metrics_uplift_binomial.R
@@ -5,79 +5,114 @@ test.make_metrics_uplift_binomial <- function() {
     response <- "outcome"
     treatment <- "treatment"
     train <- h2o.importFile(locate("smalldata/uplift/upliftml_train.csv"))
+    valid <- h2o.importFile(locate("smalldata/uplift/upliftml_test.csv"))
     train$treatment <- as.factor(train$treatment)
     train$outcome <- as.factor(train$outcome)
+    valid$treatment <- as.factor(valid$treatment)
+    valid$outcome <- as.factor(valid$outcome)
 
     predictors <- sprintf("feature_%s",seq(0:11))
     
     
     model <- h2o.upliftRandomForest(training_frame=train,
+                                    validation_frame=valid,
                                     x=predictors,
                                     y=response,
-                                    ntrees=5,
-                                    max_depth=5,
                                     treatment_column=treatment,
-                                    min_rows=10,
-                                    nbins=100,
-                                    seed=1234)
+                                    seed=42,
+                                    auuc_nbins=20,
+                                    score_each_iteration=TRUE,
+                                    ntrees=3)
     print(model)
 
-    pred <- h2o.assign(h2o.predict(model,train)[,1],"pred")
-    actual <- h2o.assign(train[,response],"act")
-    treat <- h2o.assign(train[,treatment],"treatment")
-    print(treat)
+    h2oPred <- as.data.frame(h2o.predict(model,valid)[,1])
+    
+    pred <- h2o.assign(h2o.predict(model,valid)[,1], "pred")
+    actual <- h2o.assign(valid[,response], "act")
+    treat <- h2o.assign(valid[,treatment], "treatment")
     
     thresholds <- model@model$default_auuc_thresholds
+    
+    m0 <- h2o.performance(model, valid=TRUE)
+    thresholds0 <- m0@metrics$thresholds$thresholds
+
+    m1 <- h2o.make_metrics(pred, actual, treatment=treat, custom_auuc_thresholds=thresholds)
+    thresholds1 <- m1@metrics$thresholds$thresholds
 
-    m0 <- h2o.make_metrics(pred, actual, treatment=treat, custom_auuc_thresholds=thresholds)
-    print(m0)
+    m2 <- h2o.performance(model, valid, auuc_nbins=20)
+    thresholds2 <- m2@metrics$thresholds$thresholds
+
+    tol <- 1e-10
+    ltol <- 1e-1 # There are few differences in prediction R vs. Java scoring, so the results are not the same but similar
+    
+    # thresholds should be the same
+    expect_equal(thresholds, thresholds0, tolerance=tol)
+    expect_equal(thresholds0, thresholds1, tolerance=ltol)
+    expect_equal(thresholds0, thresholds2, tolerance=tol)
     
-    m1 <- h2o.performance(model, train, custom_auuc_thresholds=thresholds)
-    print(m1)
     auuc0 <- h2o.auuc(m0)
     auuc1 <- h2o.auuc(m1)
+    auuc2 <- h2o.auuc(m2)
+    
+    expect_equal(auuc0, auuc1, tolerance=ltol)
+    expect_equal(auuc0, auuc2, tolerance=tol)
  
     auuc_table0 <- h2o.auuc_table(m0)
     auuc_table1 <- h2o.auuc_table(m1)
+    auuc_table2 <- h2o.auuc_table(m2)
     
     expect_true(is.data.frame(auuc_table0))
     expect_true(is.data.frame(auuc_table1))
+    expect_true(is.data.frame(auuc_table2))
     
-    expect_equal(auuc0, auuc1)
-    expect_equal(auuc_table0, auuc_table1)
+    expect_equal(auuc_table0, auuc_table1, tolerance=ltol)
+    expect_equal(auuc_table0, auuc_table2, tolerance=tol)
     
     thr_table0 <- h2o.thresholds_and_metric_scores(m0)
     thr_table1 <- h2o.thresholds_and_metric_scores(m1)
+    thr_table2 <- h2o.thresholds_and_metric_scores(m2)
+    
+    expect_equal(thr_table0, thr_table1, tolerance=ltol)
+    expect_equal(thr_table0, thr_table2, tolerance=tol)
     
-    expect_equal(thr_table0, thr_table1)
-   
     qini0 <- h2o.qini(m0)
     qini1 <- h2o.qini(m1)
+    qini2 <- h2o.qini(m2)
     
-    expect_equal(qini0, qini1)
+    expect_equal(qini0, qini1, tolerance=ltol)
+    expect_equal(qini0, qini2, tolerance=tol)
  
     aecu_table0 <- h2o.aecu_table(m0)
     aecu_table1 <- h2o.aecu_table(m1)
+    aecu_table2 <- h2o.aecu_table(m2)
     
     expect_true(is.data.frame(aecu_table0))
     expect_true(is.data.frame(aecu_table1))
- 
-    expect_equal(aecu_table0, aecu_table1)
+    expect_true(is.data.frame(aecu_table2))
+    
+    expect_equal(aecu_table0, aecu_table1, tolerance=ltol)
+    expect_equal(aecu_table0, aecu_table2, tolerance=tol)
 
     ate0 <- h2o.ate(m0)
     ate1 <- h2o.ate(m1)
+    ate2 <- h2o.ate(m2)
 
-    expect_equal(ate0, ate1)
+    expect_equal(ate0, ate1, tolerance=ltol)
+    expect_equal(ate0, ate2, tolerance=tol)
 
     att0 <- h2o.att(m0)
     att1 <- h2o.att(m1)
+    att2 <- h2o.att(m2)
 
-    expect_equal(att0, att1)
+    expect_equal(att0, att1, tolerance=ltol)
+    expect_equal(att0, att2, tolerance=tol)
 
     atc0 <- h2o.atc(m0)
     atc1 <- h2o.atc(m1)
+    atc2 <- h2o.atc(m2)
 
-    expect_equal(atc0, atc1)
+    expect_equal(atc0, atc1, tolerance=ltol)
+    expect_equal(atc0, atc2, tolerance=tol)
 }
 
 doSuite("Check making uplift binomial model metrics.", makeSuite(