From f5e438e87b69dfd6073a8827559e8e5b2f99e459 Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Wed, 27 Dec 2023 20:23:08 +0200 Subject: [PATCH 1/6] Updated version information --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 83058e4c..cc8b6d3e 100644 --- a/README.md +++ b/README.md @@ -143,13 +143,13 @@ Java library and command-line application for converting Apache Spark ML pipelin JPMML-SparkML library JAR file (together with accompanying Java source and Javadocs JAR files) is released via [Maven Central Repository](https://repo1.maven.org/maven2/org/jpmml/). -The current version is **2.0.2** (27 April, 2023). +The current version is **2.0.3** (27 December, 2023). ```xml org.jpmml pmml-sparkml - 2.0.2 + 2.0.3 ``` From b80920370478a83cf7d0538ae615f42e6503339b Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Wed, 27 Dec 2023 22:16:20 +0200 Subject: [PATCH 2/6] Updated version information --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3824a1c8..9af25889 100644 --- a/README.md +++ b/README.md @@ -143,13 +143,13 @@ Java library and command-line application for converting Apache Spark ML pipelin JPMML-SparkML library JAR file (together with accompanying Java source and Javadocs JAR files) is released via [Maven Central Repository](https://repo1.maven.org/maven2/org/jpmml/). -The current version is **2.1.2** (27 April, 2023). +The current version is **2.1.3** (27 December, 2023). ```xml org.jpmml pmml-sparkml - 2.1.2 + 2.1.3 ``` From ec37aeb1c90deb78b8bc5147d2d19b28319efb1c Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Wed, 27 Dec 2023 23:16:27 +0200 Subject: [PATCH 3/6] Updated version information --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8a625617..a1fc2900 100644 --- a/README.md +++ b/README.md @@ -143,13 +143,13 @@ Java library and command-line application for converting Apache Spark ML pipelin JPMML-SparkML library JAR file (together with accompanying Java source and Javadocs JAR files) is released via [Maven Central Repository](https://repo1.maven.org/maven2/org/jpmml/). -The current version is **2.2.2** (27 April, 2023). +The current version is **2.2.3** (27 December, 2023). ```xml org.jpmml pmml-sparkml - 2.2.2 + 2.2.3 ``` From 8344a0db8714cf39459ea7bb66b85e4200650e80 Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Fri, 29 Dec 2023 08:46:25 +0200 Subject: [PATCH 4/6] Improved GitHub Actions CI configuration --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index d22b3c5e..7a61ed1e 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -2,7 +2,7 @@ name: maven on: push: - branches: [ '2.0.X', '2.1.X', '2.2.X', '2.3.X', master ] + branches: [ '2.0.X', '2.1.X', '2.2.X', '2.3.X', '2.4.X', master ] jobs: build: From 6440d0d20c1f12b088d0dba32233122fe970b318 Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Fri, 29 Dec 2023 08:56:29 +0200 Subject: [PATCH 5/6] Updated documentation --- README.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cc8b6d3e..8671f2cc 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,8 @@ Java library and command-line application for converting Apache Spark ML pipelin
Apache Spark ML + Examples: [main.py](https://github.com/jpmml/jpmml-sparkml/blob/2.0.X/pmml-sparkml/src/test/resources/main.py) + * Feature extractors, transformers and selectors: * [`feature.Binarizer`](https://spark.apache.org/docs/latest/api/java/org/apache/spark/ml/feature/Binarizer.html) * [`feature.Bucketizer`](https://spark.apache.org/docs/latest/api/java/org/apache/spark/ml/feature/Bucketizer.html) @@ -120,6 +122,8 @@ Java library and command-line application for converting Apache Spark ML pipelin
LightGBM + Examples: [LightGBMAuditNA.scala](https://github.com/jpmml/jpmml-sparkml/blob/2.0.X/pmml-sparkml-lightgbm/src/test/resources/LightGBMAuditNA.scala), [LightGBMAutoNA.scaka](https://github.com/jpmml/jpmml-sparkml/blob/2.0.X/pmml-sparkml-lightgbm/src/test/resources/LightGBMAutoNA.scala), etc. + * Prediction models: * [`com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassificationModel`](https://mmlspark.blob.core.windows.net/docs/0.9.5/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMClassificationModel.html) * [`com.microsoft.azure.synapse.ml.lightgbm.LightGBMRegressionModel`](https://mmlspark.blob.core.windows.net/docs/0.9.5/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRegressionModel.html) @@ -128,6 +132,8 @@ Java library and command-line application for converting Apache Spark ML pipelin
XGBoost + Examples: [XGBoostAuditNA.scala](https://github.com/jpmml/jpmml-sparkml/blob/2.0.X/pmml-sparkml-xgboost/src/test/resources/XGBoostAuditNA.scala), [XGBoostAutoNA.scala](https://github.com/jpmml/jpmml-sparkml/blob/2.0.X/pmml-sparkml-xgboost/src/test/resources/XGBoostAutoNA.scala), etc. + * Prediction models: * [`ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel`](https://xgboost.readthedocs.io/en/latest/jvm/scaladocs/xgboost4j-spark/ml/dmlc/xgboost4j/scala/spark/XGBoostClassificationModel.html) * [`ml.dmlc.xgboost4j.scala.spark.XGBoostRegressionModel`](https://xgboost.readthedocs.io/en/latest/jvm/scaladocs/xgboost4j-spark/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressionModel.html) @@ -135,7 +141,7 @@ Java library and command-line application for converting Apache Spark ML pipelin # Prerequisites # -* Apache Spark 3.0.X, 3.1.X, 3.2.X, 3.3.X or 3.4.X. +* Apache Spark 3.0.X, 3.1.X, 3.2.X, 3.3.X, 3.4.X or 3.5.X. # Installation # @@ -163,7 +169,8 @@ Active development branches: | 3.1.X | [`2.1.X`](https://github.com/jpmml/jpmml-sparkml/tree/2.1.X) | | 3.2.X | [`2.2.X`](https://github.com/jpmml/jpmml-sparkml/tree/2.2.X) | | 3.3.X | [`2.3.X`](https://github.com/jpmml/jpmml-sparkml/tree/2.3.X) | -| 3.4.X | [`master`](https://github.com/jpmml/jpmml-sparkml/tree/master) | +| 3.4.X | [`2.4.X`](https://github.com/jpmml/jpmml-sparkml/tree/2.4.X) | +| 3.5.X | [`master`](https://github.com/jpmml/jpmml-sparkml/tree/master) | Archived development branches: From 88aec1719af85ee0bcf0e9d3831f1a617ce1ff12 Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Fri, 29 Dec 2023 09:13:29 +0200 Subject: [PATCH 6/6] Updated test resources After upgrading from XGBoost 1.7(.6) to 2.0(.1), it seems that XGBoost estimators no longer require setting the 'num_workers' and 'tracker_conf' parameters. --- pmml-sparkml-xgboost/src/test/resources/XGBoostAudit.scala | 5 ++--- pmml-sparkml-xgboost/src/test/resources/XGBoostAuto.scala | 5 ++--- pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala | 5 ++--- pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala | 5 ++--- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/pmml-sparkml-xgboost/src/test/resources/XGBoostAudit.scala b/pmml-sparkml-xgboost/src/test/resources/XGBoostAudit.scala index a0ba697d..09c551f2 100644 --- a/pmml-sparkml-xgboost/src/test/resources/XGBoostAudit.scala +++ b/pmml-sparkml-xgboost/src/test/resources/XGBoostAudit.scala @@ -1,6 +1,6 @@ import java.io.File -import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostClassifier} +import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature._ import org.apache.spark.ml.linalg.Vector @@ -25,8 +25,7 @@ val assembler = new VectorAssembler().setInputCols(ohe.getOutputCols ++ cont_col val sparse2dense = new SparseToDenseTransformer().setInputCol(assembler.getOutputCol).setOutputCol("denseFeatureVec") -val trackerConf = TrackerConf(0, "scala") -val classifier = new XGBoostClassifier(Map("objective" -> "binary:logistic", "num_round" -> 101, "tracker_conf" -> trackerConf)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(sparse2dense.getOutputCol) +val classifier = new XGBoostClassifier(Map("objective" -> "binary:logistic", "num_round" -> 101)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(sparse2dense.getOutputCol) val pipeline = new Pipeline().setStages(Array(labelIndexer, indexer, ohe, assembler, sparse2dense, classifier)) val pipelineModel = pipeline.fit(df) diff --git a/pmml-sparkml-xgboost/src/test/resources/XGBoostAuto.scala b/pmml-sparkml-xgboost/src/test/resources/XGBoostAuto.scala index 5ce7df79..756facb1 100644 --- a/pmml-sparkml-xgboost/src/test/resources/XGBoostAuto.scala +++ b/pmml-sparkml-xgboost/src/test/resources/XGBoostAuto.scala @@ -1,6 +1,6 @@ import java.io.File -import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostRegressor} +import ml.dmlc.xgboost4j.scala.spark.XGBoostRegressor import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature._ import org.apache.spark.sql.types.{FloatType, StringType} @@ -21,8 +21,7 @@ val assembler = new VectorAssembler().setInputCols(ohe.getOutputCols ++ cont_col val sparse2dense = new SparseToDenseTransformer().setInputCol(assembler.getOutputCol).setOutputCol("denseFeatureVec") -val trackerConf = TrackerConf(0, "scala") -val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101, "num_workers" -> 1, "tracker_conf" -> trackerConf)).setLabelCol("mpg").setFeaturesCol(sparse2dense.getOutputCol) +val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101)).setLabelCol("mpg").setFeaturesCol(sparse2dense.getOutputCol) val pipeline = new Pipeline().setStages(Array(indexer, ohe, assembler, sparse2dense, regressor)) val pipelineModel = pipeline.fit(df) diff --git a/pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala b/pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala index a878d3d8..8156a0d8 100644 --- a/pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala +++ b/pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala @@ -1,6 +1,6 @@ import java.io.File -import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostRegressor} +import ml.dmlc.xgboost4j.scala.spark.XGBoostRegressor import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature._ import org.apache.spark.sql.types.FloatType @@ -16,8 +16,7 @@ val cont_cols = Array("CRIM", "ZN", "INDUS", "NOX", "RM", "AGE", "DIS", "PTRATIO val assembler = new VectorAssembler().setInputCols(cat_cols ++ cont_cols).setOutputCol("featureVector") val indexer = new VectorIndexer().setInputCol(assembler.getOutputCol).setOutputCol("catFeatureVector") -val trackerConf = TrackerConf(0, "scala") -val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101, "num_workers" -> 1, "tracker_conf" -> trackerConf)).setMissing(-1).setLabelCol("MEDV").setFeaturesCol(indexer.getOutputCol) +val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101)).setMissing(-1).setLabelCol("MEDV").setFeaturesCol(indexer.getOutputCol) val pipeline = new Pipeline().setStages(Array(assembler, indexer, regressor)) val pipelineModel = pipeline.fit(df) diff --git a/pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala b/pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala index 7771b385..4a088e54 100644 --- a/pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala +++ b/pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala @@ -1,6 +1,6 @@ import java.io.File -import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostClassifier} +import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature._ import org.apache.spark.ml.linalg.Vector @@ -22,8 +22,7 @@ val labelIndexerModel = labelIndexer.fit(df) val assembler = new VectorAssembler().setInputCols(Array("Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width")).setOutputCol("featureVector") -val trackerConf = TrackerConf(0, "scala") -val classifier = new XGBoostClassifier(Map("objective" -> "multi:softprob", "num_class" -> 3, "num_round" -> 17, "tracker_conf" -> trackerConf)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(assembler.getOutputCol) +val classifier = new XGBoostClassifier(Map("objective" -> "multi:softprob", "num_class" -> 3)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(assembler.getOutputCol) val pipeline = new Pipeline().setStages(Array(labelIndexer, assembler, classifier)) val pipelineModel = pipeline.fit(df)