diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 107eee33..d22b3c5e 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -22,4 +22,4 @@ jobs:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2
- - run: mvn -B package --file pom.xml
+ - run: mvn -Dxgboost4j-spark.version=2.0.1 -B package --file pom.xml
diff --git a/README.md b/README.md
index 5dbb2ef5..5308d9ea 100644
--- a/README.md
+++ b/README.md
@@ -109,6 +109,14 @@ Java library and command-line application for converting Apache Spark ML pipelin
* [`tuning.TrainValidationSplitModel`](https://spark.apache.org/docs/latest/api/java/org/apache/spark/ml/tuning/TrainValidationSplitModel.html)
+
+ JPMML-SparkML
+
+ * Feature transformers:
+ * `org.jpmml.sparkml.feature.InvalidCategoryTransformer`
+ * `org.jpmml.sparkml.feature.SparseToDenseTransformer`
+
+
LightGBM
@@ -120,8 +128,6 @@ Java library and command-line application for converting Apache Spark ML pipelin
XGBoost
- * Feature transformers:
- * `org.jpmml.sparkml.xgboost.SparseToDenseTransformer`
* Prediction models:
* [`ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel`](https://xgboost.readthedocs.io/en/latest/jvm/scaladocs/xgboost4j-spark/ml/dmlc/xgboost4j/scala/spark/XGBoostClassificationModel.html)
* [`ml.dmlc.xgboost4j.scala.spark.XGBoostRegressionModel`](https://xgboost.readthedocs.io/en/latest/jvm/scaladocs/xgboost4j-spark/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressionModel.html)
@@ -247,6 +253,7 @@ spark-submit --master local --class org.jpmml.sparkml.example.Main pmml-sparkml-
# Documentation #
+* [Training PySpark LightGBM pipelines](https://openscoring.io/blog/2023/05/26/pyspark_lightgbm_pipeline/)
* [Converting logistic regression models to PMML documents](https://openscoring.io/blog/2020/01/19/converting_logistic_regression_pmml/#apache-spark)
* [Deploying Apache Spark ML pipeline models on Openscoring REST web service](https://openscoring.io/blog/2020/02/16/deploying_sparkml_pipeline_openscoring_rest/)
* [Converting Apache Spark ML pipeline models to PMML documents](https://openscoring.io/blog/2018/07/09/converting_sparkml_pipeline_pmml/)
diff --git a/pmml-sparkml-example/pom.xml b/pmml-sparkml-example/pom.xml
index b417b7af..5ea901b5 100644
--- a/pmml-sparkml-example/pom.xml
+++ b/pmml-sparkml-example/pom.xml
@@ -67,7 +67,7 @@
org.apache.maven.plugins
maven-shade-plugin
- 3.4.1
+ 3.5.1
package
diff --git a/pmml-sparkml-example/src/main/java/org/jpmml/sparkml/example/Main.java b/pmml-sparkml-example/src/main/java/org/jpmml/sparkml/example/Main.java
index 83d04031..5fc1278c 100644
--- a/pmml-sparkml-example/src/main/java/org/jpmml/sparkml/example/Main.java
+++ b/pmml-sparkml-example/src/main/java/org/jpmml/sparkml/example/Main.java
@@ -37,6 +37,7 @@
import org.apache.spark.sql.types.StructType;
import org.dmg.pmml.PMML;
import org.jpmml.model.metro.MetroJAXBUtil;
+import org.jpmml.sparkml.ArchiveUtil;
import org.jpmml.sparkml.PMMLBuilder;
import org.jpmml.sparkml.PipelineModelUtil;
import org.jpmml.sparkml.model.HasPredictionModelOptions;
@@ -187,7 +188,7 @@ private void run() throws Exception {
logger.info("Loading pipeline model..");
if(this.pipelineInput.isFile()){
- this.pipelineInput = PipelineModelUtil.uncompress(this.pipelineInput);
+ this.pipelineInput = ArchiveUtil.uncompress(this.pipelineInput);
}
long begin = System.currentTimeMillis();
diff --git a/pmml-sparkml-lightgbm/src/test/java/org/jpmml/sparkml/lightgbm/testing/LightGBMTest.java b/pmml-sparkml-lightgbm/src/test/java/org/jpmml/sparkml/lightgbm/testing/LightGBMTest.java
index 5a5951d4..4c56ba11 100644
--- a/pmml-sparkml-lightgbm/src/test/java/org/jpmml/sparkml/lightgbm/testing/LightGBMTest.java
+++ b/pmml-sparkml-lightgbm/src/test/java/org/jpmml/sparkml/lightgbm/testing/LightGBMTest.java
@@ -24,6 +24,7 @@
import java.util.function.Predicate;
import com.google.common.base.Equivalence;
+import org.jpmml.converter.testing.Datasets;
import org.jpmml.converter.testing.OptionsUtil;
import org.jpmml.evaluator.ResultField;
import org.jpmml.evaluator.testing.PMMLEquivalence;
@@ -34,7 +35,7 @@
import org.junit.BeforeClass;
import org.junit.Test;
-public class LightGBMTest extends SparkMLEncoderBatchTest {
+public class LightGBMTest extends SparkMLEncoderBatchTest implements Datasets {
public LightGBMTest(){
super(new PMMLEquivalence(1e-14, 1e-14));
@@ -66,17 +67,27 @@ public List