diff --git a/jvm-packages/.gitignore b/jvm-packages/.gitignore
index 6d3f7b7cbf4b..e2dc7967aae3 100644
--- a/jvm-packages/.gitignore
+++ b/jvm-packages/.gitignore
@@ -1,2 +1,4 @@
tracker.py
build.sh
+xgboost4j-tester/pom.xml
+xgboost4j-tester/iris.csv
diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala
index 6d676b0ae2b3..b8da31c09a1a 100644
--- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala
+++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala
@@ -20,10 +20,9 @@ import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature._
import org.apache.spark.ml.tuning._
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.types._
-
-import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassifier, XGBoostClassificationModel}
+import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier}
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
@@ -50,6 +49,13 @@ object SparkMLlibPipeline {
.appName("XGBoost4J-Spark Pipeline Example")
.getOrCreate()
+ run(spark, inputPath, nativeModelPath, pipelineModelPath, treeMethod, numWorkers)
+ .show(false)
+ }
+ private[spark] def run(spark: SparkSession, inputPath: String, nativeModelPath: String,
+ pipelineModelPath: String, treeMethod: String,
+ numWorkers: Int): DataFrame = {
+
// Load dataset
val schema = new StructType(Array(
StructField("sepal length", DoubleType, true),
@@ -90,11 +96,11 @@ object SparkMLlibPipeline {
val labelConverter = new IndexToString()
.setInputCol("prediction")
.setOutputCol("realLabel")
- .setLabels(labelIndexer.labels)
+ .setLabels(labelIndexer.labelsArray(0))
val pipeline = new Pipeline()
.setStages(Array(assembler, labelIndexer, booster, labelConverter))
- val model = pipeline.fit(training)
+ val model: PipelineModel = pipeline.fit(training)
// Batch prediction
val prediction = model.transform(test)
@@ -136,6 +142,6 @@ object SparkMLlibPipeline {
// Load a saved model and serving
val model2 = PipelineModel.load(pipelineModelPath)
- model2.transform(test).show(false)
+ model2.transform(test)
}
}
diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala
index 17a32bc09e72..a7886f52490e 100644
--- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala
+++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala
@@ -17,9 +17,8 @@
package ml.dmlc.xgboost4j.scala.example.spark
import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
-
import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
// this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
@@ -38,6 +37,12 @@ object SparkTraining {
val spark = SparkSession.builder().getOrCreate()
val inputPath = args(0)
+ val results: DataFrame = run(spark, inputPath, treeMethod, numWorkers)
+ results.show()
+ }
+
+private[spark] def run(spark: SparkSession, inputPath: String,
+ treeMethod: String, numWorkers: Int): DataFrame = {
val schema = new StructType(Array(
StructField("sepal length", DoubleType, true),
StructField("sepal width", DoubleType, true),
@@ -81,7 +86,6 @@ object SparkTraining {
setFeaturesCol("features").
setLabelCol("classIndex")
val xgbClassificationModel = xgbClassifier.fit(train)
- val results = xgbClassificationModel.transform(test)
- results.show()
+ xgbClassificationModel.transform(test)
}
}
diff --git a/jvm-packages/xgboost4j-example/src/test/resources/iris.csv b/jvm-packages/xgboost4j-example/src/test/resources/iris.csv
new file mode 100644
index 000000000000..2bf7d09d28c0
--- /dev/null
+++ b/jvm-packages/xgboost4j-example/src/test/resources/iris.csv
@@ -0,0 +1,150 @@
+5.1,3.5,1.4,0.2,Iris-setosa
+4.9,3.0,1.4,0.2,Iris-setosa
+4.7,3.2,1.3,0.2,Iris-setosa
+4.6,3.1,1.5,0.2,Iris-setosa
+5.0,3.6,1.4,0.2,Iris-setosa
+5.4,3.9,1.7,0.4,Iris-setosa
+4.6,3.4,1.4,0.3,Iris-setosa
+5.0,3.4,1.5,0.2,Iris-setosa
+4.4,2.9,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.4,3.7,1.5,0.2,Iris-setosa
+4.8,3.4,1.6,0.2,Iris-setosa
+4.8,3.0,1.4,0.1,Iris-setosa
+4.3,3.0,1.1,0.1,Iris-setosa
+5.8,4.0,1.2,0.2,Iris-setosa
+5.7,4.4,1.5,0.4,Iris-setosa
+5.4,3.9,1.3,0.4,Iris-setosa
+5.1,3.5,1.4,0.3,Iris-setosa
+5.7,3.8,1.7,0.3,Iris-setosa
+5.1,3.8,1.5,0.3,Iris-setosa
+5.4,3.4,1.7,0.2,Iris-setosa
+5.1,3.7,1.5,0.4,Iris-setosa
+4.6,3.6,1.0,0.2,Iris-setosa
+5.1,3.3,1.7,0.5,Iris-setosa
+4.8,3.4,1.9,0.2,Iris-setosa
+5.0,3.0,1.6,0.2,Iris-setosa
+5.0,3.4,1.6,0.4,Iris-setosa
+5.2,3.5,1.5,0.2,Iris-setosa
+5.2,3.4,1.4,0.2,Iris-setosa
+4.7,3.2,1.6,0.2,Iris-setosa
+4.8,3.1,1.6,0.2,Iris-setosa
+5.4,3.4,1.5,0.4,Iris-setosa
+5.2,4.1,1.5,0.1,Iris-setosa
+5.5,4.2,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.2,Iris-setosa
+5.0,3.2,1.2,0.2,Iris-setosa
+5.5,3.5,1.3,0.2,Iris-setosa
+4.9,3.6,1.4,0.1,Iris-setosa
+4.4,3.0,1.3,0.2,Iris-setosa
+5.1,3.4,1.5,0.2,Iris-setosa
+5.0,3.5,1.3,0.3,Iris-setosa
+4.5,2.3,1.3,0.3,Iris-setosa
+4.4,3.2,1.3,0.2,Iris-setosa
+5.0,3.5,1.6,0.6,Iris-setosa
+5.1,3.8,1.9,0.4,Iris-setosa
+4.8,3.0,1.4,0.3,Iris-setosa
+5.1,3.8,1.6,0.2,Iris-setosa
+4.6,3.2,1.4,0.2,Iris-setosa
+5.3,3.7,1.5,0.2,Iris-setosa
+5.0,3.3,1.4,0.2,Iris-setosa
+7.0,3.2,4.7,1.4,Iris-versicolor
+6.4,3.2,4.5,1.5,Iris-versicolor
+6.9,3.1,4.9,1.5,Iris-versicolor
+5.5,2.3,4.0,1.3,Iris-versicolor
+6.5,2.8,4.6,1.5,Iris-versicolor
+5.7,2.8,4.5,1.3,Iris-versicolor
+6.3,3.3,4.7,1.6,Iris-versicolor
+4.9,2.4,3.3,1.0,Iris-versicolor
+6.6,2.9,4.6,1.3,Iris-versicolor
+5.2,2.7,3.9,1.4,Iris-versicolor
+5.0,2.0,3.5,1.0,Iris-versicolor
+5.9,3.0,4.2,1.5,Iris-versicolor
+6.0,2.2,4.0,1.0,Iris-versicolor
+6.1,2.9,4.7,1.4,Iris-versicolor
+5.6,2.9,3.6,1.3,Iris-versicolor
+6.7,3.1,4.4,1.4,Iris-versicolor
+5.6,3.0,4.5,1.5,Iris-versicolor
+5.8,2.7,4.1,1.0,Iris-versicolor
+6.2,2.2,4.5,1.5,Iris-versicolor
+5.6,2.5,3.9,1.1,Iris-versicolor
+5.9,3.2,4.8,1.8,Iris-versicolor
+6.1,2.8,4.0,1.3,Iris-versicolor
+6.3,2.5,4.9,1.5,Iris-versicolor
+6.1,2.8,4.7,1.2,Iris-versicolor
+6.4,2.9,4.3,1.3,Iris-versicolor
+6.6,3.0,4.4,1.4,Iris-versicolor
+6.8,2.8,4.8,1.4,Iris-versicolor
+6.7,3.0,5.0,1.7,Iris-versicolor
+6.0,2.9,4.5,1.5,Iris-versicolor
+5.7,2.6,3.5,1.0,Iris-versicolor
+5.5,2.4,3.8,1.1,Iris-versicolor
+5.5,2.4,3.7,1.0,Iris-versicolor
+5.8,2.7,3.9,1.2,Iris-versicolor
+6.0,2.7,5.1,1.6,Iris-versicolor
+5.4,3.0,4.5,1.5,Iris-versicolor
+6.0,3.4,4.5,1.6,Iris-versicolor
+6.7,3.1,4.7,1.5,Iris-versicolor
+6.3,2.3,4.4,1.3,Iris-versicolor
+5.6,3.0,4.1,1.3,Iris-versicolor
+5.5,2.5,4.0,1.3,Iris-versicolor
+5.5,2.6,4.4,1.2,Iris-versicolor
+6.1,3.0,4.6,1.4,Iris-versicolor
+5.8,2.6,4.0,1.2,Iris-versicolor
+5.0,2.3,3.3,1.0,Iris-versicolor
+5.6,2.7,4.2,1.3,Iris-versicolor
+5.7,3.0,4.2,1.2,Iris-versicolor
+5.7,2.9,4.2,1.3,Iris-versicolor
+6.2,2.9,4.3,1.3,Iris-versicolor
+5.1,2.5,3.0,1.1,Iris-versicolor
+5.7,2.8,4.1,1.3,Iris-versicolor
+6.3,3.3,6.0,2.5,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+7.1,3.0,5.9,2.1,Iris-virginica
+6.3,2.9,5.6,1.8,Iris-virginica
+6.5,3.0,5.8,2.2,Iris-virginica
+7.6,3.0,6.6,2.1,Iris-virginica
+4.9,2.5,4.5,1.7,Iris-virginica
+7.3,2.9,6.3,1.8,Iris-virginica
+6.7,2.5,5.8,1.8,Iris-virginica
+7.2,3.6,6.1,2.5,Iris-virginica
+6.5,3.2,5.1,2.0,Iris-virginica
+6.4,2.7,5.3,1.9,Iris-virginica
+6.8,3.0,5.5,2.1,Iris-virginica
+5.7,2.5,5.0,2.0,Iris-virginica
+5.8,2.8,5.1,2.4,Iris-virginica
+6.4,3.2,5.3,2.3,Iris-virginica
+6.5,3.0,5.5,1.8,Iris-virginica
+7.7,3.8,6.7,2.2,Iris-virginica
+7.7,2.6,6.9,2.3,Iris-virginica
+6.0,2.2,5.0,1.5,Iris-virginica
+6.9,3.2,5.7,2.3,Iris-virginica
+5.6,2.8,4.9,2.0,Iris-virginica
+7.7,2.8,6.7,2.0,Iris-virginica
+6.3,2.7,4.9,1.8,Iris-virginica
+6.7,3.3,5.7,2.1,Iris-virginica
+7.2,3.2,6.0,1.8,Iris-virginica
+6.2,2.8,4.8,1.8,Iris-virginica
+6.1,3.0,4.9,1.8,Iris-virginica
+6.4,2.8,5.6,2.1,Iris-virginica
+7.2,3.0,5.8,1.6,Iris-virginica
+7.4,2.8,6.1,1.9,Iris-virginica
+7.9,3.8,6.4,2.0,Iris-virginica
+6.4,2.8,5.6,2.2,Iris-virginica
+6.3,2.8,5.1,1.5,Iris-virginica
+6.1,2.6,5.6,1.4,Iris-virginica
+7.7,3.0,6.1,2.3,Iris-virginica
+6.3,3.4,5.6,2.4,Iris-virginica
+6.4,3.1,5.5,1.8,Iris-virginica
+6.0,3.0,4.8,1.8,Iris-virginica
+6.9,3.1,5.4,2.1,Iris-virginica
+6.7,3.1,5.6,2.4,Iris-virginica
+6.9,3.1,5.1,2.3,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+6.8,3.2,5.9,2.3,Iris-virginica
+6.7,3.3,5.7,2.5,Iris-virginica
+6.7,3.0,5.2,2.3,Iris-virginica
+6.3,2.5,5.0,1.9,Iris-virginica
+6.5,3.0,5.2,2.0,Iris-virginica
+6.2,3.4,5.4,2.3,Iris-virginica
+5.9,3.0,5.1,1.8,Iris-virginica
diff --git a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala
new file mode 100644
index 000000000000..047042bc5782
--- /dev/null
+++ b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala
@@ -0,0 +1,77 @@
+/*
+ Copyright (c) 2014-2023 by Contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package ml.dmlc.xgboost4j.scala.example.spark
+
+import org.apache.spark.sql.SparkSession
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.funsuite.AnyFunSuite
+
+import java.io.File
+
+class SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll {
+ protected val numWorkers: Int = scala.math.min(Runtime.getRuntime.availableProcessors(), 4)
+ private val iris = getClass.getClassLoader.getResource("iris.csv").getPath
+ private var spark: SparkSession = _
+
+ override def beforeAll(): Unit = {
+ if (spark == null) {
+ spark = SparkSession
+ .builder()
+ .appName("XGBoost4J-Spark Pipeline Example")
+ .master(s"local[${numWorkers}]")
+ .config("spark.ui.enabled", value = false)
+ .config("spark.driver.memory", "512m")
+ .config("spark.barrier.sync.timeout", 10)
+ .config("spark.task.cpus", 1)
+ .getOrCreate()
+ spark.sparkContext.setLogLevel("ERROR")
+ }
+ }
+
+ override def afterAll(): Unit = {
+ if (spark != null) {
+ spark.stop()
+ cleanExternalCache(spark.sparkContext.appName)
+ spark = null
+ }
+ }
+
+ private def cleanExternalCache(prefix: String): Unit = {
+ val dir = new File(".")
+ for (file <- dir.listFiles() if file.getName.startsWith(prefix)) {
+ file.delete()
+ }
+ }
+
+ test("Smoke test for SparkMLlibPipeline example") {
+ SparkMLlibPipeline.run(spark, iris, "target/native-model",
+ "target/pipeline-model", "auto", 2)
+ }
+
+ test("Smoke test for SparkTraining example") {
+ val spark = SparkSession
+ .builder()
+ .appName("XGBoost4J-Spark Pipeline Example")
+ .master(s"local[${numWorkers}]")
+ .config("spark.ui.enabled", value = false)
+ .config("spark.driver.memory", "512m")
+ .config("spark.barrier.sync.timeout", 10)
+ .config("spark.task.cpus", 1)
+ .getOrCreate()
+
+ SparkTraining.run(spark, iris, "auto", 2)
+ }
+}
diff --git a/jvm-packages/xgboost4j-tester/generate_pom.py b/jvm-packages/xgboost4j-tester/generate_pom.py
index c0945075592e..b9c274c28a4d 100644
--- a/jvm-packages/xgboost4j-tester/generate_pom.py
+++ b/jvm-packages/xgboost4j-tester/generate_pom.py
@@ -8,7 +8,7 @@
4.0.0
ml.dmlc
- xgboost4j-tester_${scala.binary.version}
+ xgboost4j-tester_{scala_binary_version}
1.0-SNAPSHOT
xgboost4j-tester
@@ -17,16 +17,19 @@
UTF-8
{maven_compiler_source}
{maven_compiler_target}
+ 4.13.2
{spark_version}
{scala_version}
+ 3.2.15
{scala_binary_version}
+ 5.5.0
-
+
com.esotericsoftware
kryo
- 4.0.2
+ ${{kryo.version}}
org.scala-lang
@@ -48,29 +51,12 @@
commons-logging
1.2
-
- com.typesafe.akka
- akka-testkit_${{scala.binary.version}}
- 2.6.20
- test
-
org.scalatest
scalatest_${{scala.binary.version}}
- 3.0.8
- test
-
-
- org.scalactic
- scalactic_${{scala.binary.version}}
- 3.2.15
+ ${{scalatest.version}}
test
-
- org.apache.commons
- commons-lang3
- 3.9
-
org.apache.spark
spark-core_${{scala.binary.version}}
@@ -92,7 +78,7 @@
junit
junit
- 4.13.2
+ ${{junit.version}}
test
@@ -122,36 +108,9 @@
-
-
- maven-clean-plugin
- 3.1.0
-
-
-
- maven-resources-plugin
- 3.0.2
-
-
- maven-compiler-plugin
- 3.8.0
-
-
- maven-jar-plugin
- 3.0.2
-
-
- maven-install-plugin
- 2.5.2
-
-
- maven-deploy-plugin
- 2.8.2
-
org.apache.maven.plugins
maven-assembly-plugin
- 2.4
jar-with-dependencies
@@ -171,22 +130,12 @@
-
-
- maven-site-plugin
- 3.7.1
-
-
- maven-project-info-reports-plugin
- 3.0.0
-
org.apache.maven.plugins
maven-surefire-plugin
- 2.22.1
- ml.dmlc:xgboost4j_2.12
+ ml.dmlc:xgboost4j_${{scala.binary.version}}
diff --git a/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java b/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java
deleted file mode 100644
index 2df69374806a..000000000000
--- a/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package ml.dmlc.xgboost4j.tester;
-
-import static org.junit.Assert.assertTrue;
-
-import org.junit.Test;
-
-/**
- * Unit test for simple App.
- */
-public class AppTest
-{
- /**
- * Rigorous Test :-)
- */
- @Test
- public void shouldAnswerWithTrue()
- {
- assertTrue( true );
- }
-}
diff --git a/tests/buildkite/build-jvm-packages.sh b/tests/buildkite/build-jvm-packages.sh
index 1de43bbd0c6e..33cfffe713bc 100755
--- a/tests/buildkite/build-jvm-packages.sh
+++ b/tests/buildkite/build-jvm-packages.sh
@@ -4,11 +4,18 @@ set -euo pipefail
source tests/buildkite/conftest.sh
-echo "--- Build XGBoost JVM packages"
+echo "--- Build XGBoost JVM packages scala 2.12"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION}
+
+echo "--- Build XGBoost JVM packages scala 2.13"
+
+tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \
+ ${SPARK_VERSION} "" "" "true"
+
echo "--- Stash XGBoost4J JARs"
buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar"
+buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar"
buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar"
diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh
index cf9270c1162c..957dd443c5ee 100755
--- a/tests/buildkite/conftest.sh
+++ b/tests/buildkite/conftest.sh
@@ -25,7 +25,7 @@ set -x
CUDA_VERSION=11.8.0
NCCL_VERSION=2.16.5-1
RAPIDS_VERSION=23.02
-SPARK_VERSION=3.1.1
+SPARK_VERSION=3.4.0
JDK_VERSION=8
if [[ -z ${BUILDKITE:-} ]]
diff --git a/tests/ci_build/Dockerfile.jvm_cross b/tests/ci_build/Dockerfile.jvm_cross
index 6d9c5c57f294..fdfae310aac5 100644
--- a/tests/ci_build/Dockerfile.jvm_cross
+++ b/tests/ci_build/Dockerfile.jvm_cross
@@ -20,10 +20,14 @@ RUN \
wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \
tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \
ln -s /opt/apache-maven-3.6.1/ /opt/maven && \
- # Spark
- wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \
- tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \
- ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark
+ # Spark with scala 2.12
+ mkdir -p /opt/spark-scala-2.12 && \
+ wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz && \
+ tar xvf spark-$SPARK_VERSION-bin-hadoop3.tgz --strip-components=1 -C /opt/spark-scala-2.12 && \
+ # Spark with scala 2.13
+ mkdir -p /opt/spark-scala-2.13 && \
+ wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz && \
+ tar xvf spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz --strip-components=1 -C /opt/spark-scala-2.13
ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH
diff --git a/tests/ci_build/test_jvm_cross.sh b/tests/ci_build/test_jvm_cross.sh
index 378846d651db..18265cf015d3 100755
--- a/tests/ci_build/test_jvm_cross.sh
+++ b/tests/ci_build/test_jvm_cross.sh
@@ -6,37 +6,56 @@ set -x
# Initialize local Maven repository
./tests/ci_build/initialize_maven.sh
-# Get version number of XGBoost4J and other auxiliary information
cd jvm-packages
+jvm_packages_dir=`pwd`
+# Get version number of XGBoost4J and other auxiliary information
xgboost4j_version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
maven_compiler_source=$(mvn help:evaluate -Dexpression=maven.compiler.source -q -DforceStdout)
maven_compiler_target=$(mvn help:evaluate -Dexpression=maven.compiler.target -q -DforceStdout)
spark_version=$(mvn help:evaluate -Dexpression=spark.version -q -DforceStdout)
-scala_version=$(mvn help:evaluate -Dexpression=scala.version -q -DforceStdout)
-scala_binary_version=$(mvn help:evaluate -Dexpression=scala.binary.version -q -DforceStdout)
-
-# Install XGBoost4J JAR into local Maven repository
-mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
-mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests
-mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
-mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
-
-cd xgboost4j-tester
-# Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests
-python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version}
-# Run unit tests with XGBoost4J
-mvn --no-transfer-progress package
-
-# Run integration tests with XGBoost4J
-java -jar ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar
-
-# Run integration tests with XGBoost4J-Spark
-if [ ! -z "$RUN_INTEGRATION_TEST" ]
-then
+
+if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
+ cd $jvm_packages_dir/xgboost4j-tester
python3 get_iris.py
- spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv
- spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv ${PWD}/native_model ${PWD}/pipeline_model
+ cd $jvm_packages_dir
fi
+# including maven profiles for different scala versions: 2.12 is the default at the moment.
+for _maven_profile_string in "" "-Pdefault,scala-2.13"; do
+ scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout)
+ scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout)
+
+ # Install XGBoost4J JAR into local Maven repository
+ mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
+ mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests
+ mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
+ mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar
+
+ cd xgboost4j-tester
+ # Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests
+ python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version}
+ # Build package and unit tests with XGBoost4J
+ mvn --no-transfer-progress clean package
+ xgboost4j_tester_jar="$jvm_packages_dir/xgboost4j-tester/target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar"
+ # Run integration tests with XGBoost4J
+ java -jar $xgboost4j_tester_jar
+
+ # Run integration tests with XGBoost4J-Spark
+ if [ ! -z "$RUN_INTEGRATION_TEST" ]; then
+ # Changing directory so that we do not mix code and resulting files
+ cd target
+ if [[ "$scala_binary_version" == "2.12" ]]; then
+ /opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
+ /opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
+ elif [[ "$scala_binary_version" == "2.13" ]]; then
+ /opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv
+ /opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version}
+ else
+ echo "Unexpected scala version: $scala_version ($scala_binary_version)."
+ fi
+ fi
+ cd $jvm_packages_dir
+done
+
set +x
set +e