From 88aec1719af85ee0bcf0e9d3831f1a617ce1ff12 Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Fri, 29 Dec 2023 09:13:29 +0200 Subject: [PATCH] Updated test resources After upgrading from XGBoost 1.7(.6) to 2.0(.1), it seems that XGBoost estimators no longer require setting the 'num_workers' and 'tracker_conf' parameters. --- pmml-sparkml-xgboost/src/test/resources/XGBoostAudit.scala | 5 ++--- pmml-sparkml-xgboost/src/test/resources/XGBoostAuto.scala | 5 ++--- pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala | 5 ++--- pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala | 5 ++--- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/pmml-sparkml-xgboost/src/test/resources/XGBoostAudit.scala b/pmml-sparkml-xgboost/src/test/resources/XGBoostAudit.scala index a0ba697d..09c551f2 100644 --- a/pmml-sparkml-xgboost/src/test/resources/XGBoostAudit.scala +++ b/pmml-sparkml-xgboost/src/test/resources/XGBoostAudit.scala @@ -1,6 +1,6 @@ import java.io.File -import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostClassifier} +import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature._ import org.apache.spark.ml.linalg.Vector @@ -25,8 +25,7 @@ val assembler = new VectorAssembler().setInputCols(ohe.getOutputCols ++ cont_col val sparse2dense = new SparseToDenseTransformer().setInputCol(assembler.getOutputCol).setOutputCol("denseFeatureVec") -val trackerConf = TrackerConf(0, "scala") -val classifier = new XGBoostClassifier(Map("objective" -> "binary:logistic", "num_round" -> 101, "tracker_conf" -> trackerConf)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(sparse2dense.getOutputCol) +val classifier = new XGBoostClassifier(Map("objective" -> "binary:logistic", "num_round" -> 101)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(sparse2dense.getOutputCol) val pipeline = new Pipeline().setStages(Array(labelIndexer, indexer, ohe, assembler, sparse2dense, classifier)) val pipelineModel = pipeline.fit(df) diff --git a/pmml-sparkml-xgboost/src/test/resources/XGBoostAuto.scala b/pmml-sparkml-xgboost/src/test/resources/XGBoostAuto.scala index 5ce7df79..756facb1 100644 --- a/pmml-sparkml-xgboost/src/test/resources/XGBoostAuto.scala +++ b/pmml-sparkml-xgboost/src/test/resources/XGBoostAuto.scala @@ -1,6 +1,6 @@ import java.io.File -import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostRegressor} +import ml.dmlc.xgboost4j.scala.spark.XGBoostRegressor import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature._ import org.apache.spark.sql.types.{FloatType, StringType} @@ -21,8 +21,7 @@ val assembler = new VectorAssembler().setInputCols(ohe.getOutputCols ++ cont_col val sparse2dense = new SparseToDenseTransformer().setInputCol(assembler.getOutputCol).setOutputCol("denseFeatureVec") -val trackerConf = TrackerConf(0, "scala") -val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101, "num_workers" -> 1, "tracker_conf" -> trackerConf)).setLabelCol("mpg").setFeaturesCol(sparse2dense.getOutputCol) +val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101)).setLabelCol("mpg").setFeaturesCol(sparse2dense.getOutputCol) val pipeline = new Pipeline().setStages(Array(indexer, ohe, assembler, sparse2dense, regressor)) val pipelineModel = pipeline.fit(df) diff --git a/pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala b/pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala index a878d3d8..8156a0d8 100644 --- a/pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala +++ b/pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala @@ -1,6 +1,6 @@ import java.io.File -import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostRegressor} +import ml.dmlc.xgboost4j.scala.spark.XGBoostRegressor import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature._ import org.apache.spark.sql.types.FloatType @@ -16,8 +16,7 @@ val cont_cols = Array("CRIM", "ZN", "INDUS", "NOX", "RM", "AGE", "DIS", "PTRATIO val assembler = new VectorAssembler().setInputCols(cat_cols ++ cont_cols).setOutputCol("featureVector") val indexer = new VectorIndexer().setInputCol(assembler.getOutputCol).setOutputCol("catFeatureVector") -val trackerConf = TrackerConf(0, "scala") -val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101, "num_workers" -> 1, "tracker_conf" -> trackerConf)).setMissing(-1).setLabelCol("MEDV").setFeaturesCol(indexer.getOutputCol) +val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101)).setMissing(-1).setLabelCol("MEDV").setFeaturesCol(indexer.getOutputCol) val pipeline = new Pipeline().setStages(Array(assembler, indexer, regressor)) val pipelineModel = pipeline.fit(df) diff --git a/pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala b/pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala index 7771b385..4a088e54 100644 --- a/pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala +++ b/pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala @@ -1,6 +1,6 @@ import java.io.File -import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostClassifier} +import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature._ import org.apache.spark.ml.linalg.Vector @@ -22,8 +22,7 @@ val labelIndexerModel = labelIndexer.fit(df) val assembler = new VectorAssembler().setInputCols(Array("Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width")).setOutputCol("featureVector") -val trackerConf = TrackerConf(0, "scala") -val classifier = new XGBoostClassifier(Map("objective" -> "multi:softprob", "num_class" -> 3, "num_round" -> 17, "tracker_conf" -> trackerConf)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(assembler.getOutputCol) +val classifier = new XGBoostClassifier(Map("objective" -> "multi:softprob", "num_class" -> 3)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(assembler.getOutputCol) val pipeline = new Pipeline().setStages(Array(labelIndexer, assembler, classifier)) val pipelineModel = pipeline.fit(df)