Skip to content

Commit

Permalink
Updated test resources
Browse files Browse the repository at this point in the history
After upgrading from XGBoost 1.7(.6) to 2.0(.1), it seems that XGBoost
estimators no longer require setting the 'num_workers' and 'tracker_conf'
parameters.
  • Loading branch information
vruusmann committed Dec 29, 2023
1 parent 4225cce commit 88aec17
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 12 deletions.
5 changes: 2 additions & 3 deletions pmml-sparkml-xgboost/src/test/resources/XGBoostAudit.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import java.io.File

import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostClassifier}
import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature._
import org.apache.spark.ml.linalg.Vector
Expand All @@ -25,8 +25,7 @@ val assembler = new VectorAssembler().setInputCols(ohe.getOutputCols ++ cont_col

val sparse2dense = new SparseToDenseTransformer().setInputCol(assembler.getOutputCol).setOutputCol("denseFeatureVec")

val trackerConf = TrackerConf(0, "scala")
val classifier = new XGBoostClassifier(Map("objective" -> "binary:logistic", "num_round" -> 101, "tracker_conf" -> trackerConf)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(sparse2dense.getOutputCol)
val classifier = new XGBoostClassifier(Map("objective" -> "binary:logistic", "num_round" -> 101)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(sparse2dense.getOutputCol)

val pipeline = new Pipeline().setStages(Array(labelIndexer, indexer, ohe, assembler, sparse2dense, classifier))
val pipelineModel = pipeline.fit(df)
Expand Down
5 changes: 2 additions & 3 deletions pmml-sparkml-xgboost/src/test/resources/XGBoostAuto.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import java.io.File

import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostRegressor}
import ml.dmlc.xgboost4j.scala.spark.XGBoostRegressor
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature._
import org.apache.spark.sql.types.{FloatType, StringType}
Expand All @@ -21,8 +21,7 @@ val assembler = new VectorAssembler().setInputCols(ohe.getOutputCols ++ cont_col

val sparse2dense = new SparseToDenseTransformer().setInputCol(assembler.getOutputCol).setOutputCol("denseFeatureVec")

val trackerConf = TrackerConf(0, "scala")
val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101, "num_workers" -> 1, "tracker_conf" -> trackerConf)).setLabelCol("mpg").setFeaturesCol(sparse2dense.getOutputCol)
val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101)).setLabelCol("mpg").setFeaturesCol(sparse2dense.getOutputCol)

val pipeline = new Pipeline().setStages(Array(indexer, ohe, assembler, sparse2dense, regressor))
val pipelineModel = pipeline.fit(df)
Expand Down
5 changes: 2 additions & 3 deletions pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import java.io.File

import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostRegressor}
import ml.dmlc.xgboost4j.scala.spark.XGBoostRegressor
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature._
import org.apache.spark.sql.types.FloatType
Expand All @@ -16,8 +16,7 @@ val cont_cols = Array("CRIM", "ZN", "INDUS", "NOX", "RM", "AGE", "DIS", "PTRATIO
val assembler = new VectorAssembler().setInputCols(cat_cols ++ cont_cols).setOutputCol("featureVector")
val indexer = new VectorIndexer().setInputCol(assembler.getOutputCol).setOutputCol("catFeatureVector")

val trackerConf = TrackerConf(0, "scala")
val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101, "num_workers" -> 1, "tracker_conf" -> trackerConf)).setMissing(-1).setLabelCol("MEDV").setFeaturesCol(indexer.getOutputCol)
val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101)).setMissing(-1).setLabelCol("MEDV").setFeaturesCol(indexer.getOutputCol)

val pipeline = new Pipeline().setStages(Array(assembler, indexer, regressor))
val pipelineModel = pipeline.fit(df)
Expand Down
5 changes: 2 additions & 3 deletions pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import java.io.File

import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostClassifier}
import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature._
import org.apache.spark.ml.linalg.Vector
Expand All @@ -22,8 +22,7 @@ val labelIndexerModel = labelIndexer.fit(df)

val assembler = new VectorAssembler().setInputCols(Array("Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width")).setOutputCol("featureVector")

val trackerConf = TrackerConf(0, "scala")
val classifier = new XGBoostClassifier(Map("objective" -> "multi:softprob", "num_class" -> 3, "num_round" -> 17, "tracker_conf" -> trackerConf)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(assembler.getOutputCol)
val classifier = new XGBoostClassifier(Map("objective" -> "multi:softprob", "num_class" -> 3)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(assembler.getOutputCol)

val pipeline = new Pipeline().setStages(Array(labelIndexer, assembler, classifier))
val pipelineModel = pipeline.fit(df)
Expand Down

0 comments on commit 88aec17

Please sign in to comment.