diff --git a/jvm-packages/.gitignore b/jvm-packages/.gitignore index 6d3f7b7cbf4b..e2dc7967aae3 100644 --- a/jvm-packages/.gitignore +++ b/jvm-packages/.gitignore @@ -1,2 +1,4 @@ tracker.py build.sh +xgboost4j-tester/pom.xml +xgboost4j-tester/iris.csv diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala index 6d676b0ae2b3..b8da31c09a1a 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala @@ -20,10 +20,9 @@ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import org.apache.spark.ml.feature._ import org.apache.spark.ml.tuning._ -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.types._ - -import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassifier, XGBoostClassificationModel} +import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostClassifier} // this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris) @@ -50,6 +49,13 @@ object SparkMLlibPipeline { .appName("XGBoost4J-Spark Pipeline Example") .getOrCreate() + run(spark, inputPath, nativeModelPath, pipelineModelPath, treeMethod, numWorkers) + .show(false) + } + private[spark] def run(spark: SparkSession, inputPath: String, nativeModelPath: String, + pipelineModelPath: String, treeMethod: String, + numWorkers: Int): DataFrame = { + // Load dataset val schema = new StructType(Array( StructField("sepal length", DoubleType, true), @@ -90,11 +96,11 @@ object SparkMLlibPipeline { val labelConverter = new IndexToString() .setInputCol("prediction") .setOutputCol("realLabel") - .setLabels(labelIndexer.labels) + .setLabels(labelIndexer.labelsArray(0)) val pipeline = new Pipeline() .setStages(Array(assembler, labelIndexer, booster, labelConverter)) - val model = pipeline.fit(training) + val model: PipelineModel = pipeline.fit(training) // Batch prediction val prediction = model.transform(test) @@ -136,6 +142,6 @@ object SparkMLlibPipeline { // Load a saved model and serving val model2 = PipelineModel.load(pipelineModelPath) - model2.transform(test).show(false) + model2.transform(test) } } diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala index 17a32bc09e72..a7886f52490e 100644 --- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala +++ b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkTraining.scala @@ -17,9 +17,8 @@ package ml.dmlc.xgboost4j.scala.example.spark import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier - import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler} -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType} // this example works with Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris) @@ -38,6 +37,12 @@ object SparkTraining { val spark = SparkSession.builder().getOrCreate() val inputPath = args(0) + val results: DataFrame = run(spark, inputPath, treeMethod, numWorkers) + results.show() + } + +private[spark] def run(spark: SparkSession, inputPath: String, + treeMethod: String, numWorkers: Int): DataFrame = { val schema = new StructType(Array( StructField("sepal length", DoubleType, true), StructField("sepal width", DoubleType, true), @@ -81,7 +86,6 @@ object SparkTraining { setFeaturesCol("features"). setLabelCol("classIndex") val xgbClassificationModel = xgbClassifier.fit(train) - val results = xgbClassificationModel.transform(test) - results.show() + xgbClassificationModel.transform(test) } } diff --git a/jvm-packages/xgboost4j-example/src/test/resources/iris.csv b/jvm-packages/xgboost4j-example/src/test/resources/iris.csv new file mode 100644 index 000000000000..2bf7d09d28c0 --- /dev/null +++ b/jvm-packages/xgboost4j-example/src/test/resources/iris.csv @@ -0,0 +1,150 @@ +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.2,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.6,1.4,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica diff --git a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala new file mode 100644 index 000000000000..047042bc5782 --- /dev/null +++ b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkExamplesTest.scala @@ -0,0 +1,77 @@ +/* + Copyright (c) 2014-2023 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +package ml.dmlc.xgboost4j.scala.example.spark + +import org.apache.spark.sql.SparkSession +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite + +import java.io.File + +class SparkExamplesTest extends AnyFunSuite with BeforeAndAfterAll { + protected val numWorkers: Int = scala.math.min(Runtime.getRuntime.availableProcessors(), 4) + private val iris = getClass.getClassLoader.getResource("iris.csv").getPath + private var spark: SparkSession = _ + + override def beforeAll(): Unit = { + if (spark == null) { + spark = SparkSession + .builder() + .appName("XGBoost4J-Spark Pipeline Example") + .master(s"local[${numWorkers}]") + .config("spark.ui.enabled", value = false) + .config("spark.driver.memory", "512m") + .config("spark.barrier.sync.timeout", 10) + .config("spark.task.cpus", 1) + .getOrCreate() + spark.sparkContext.setLogLevel("ERROR") + } + } + + override def afterAll(): Unit = { + if (spark != null) { + spark.stop() + cleanExternalCache(spark.sparkContext.appName) + spark = null + } + } + + private def cleanExternalCache(prefix: String): Unit = { + val dir = new File(".") + for (file <- dir.listFiles() if file.getName.startsWith(prefix)) { + file.delete() + } + } + + test("Smoke test for SparkMLlibPipeline example") { + SparkMLlibPipeline.run(spark, iris, "target/native-model", + "target/pipeline-model", "auto", 2) + } + + test("Smoke test for SparkTraining example") { + val spark = SparkSession + .builder() + .appName("XGBoost4J-Spark Pipeline Example") + .master(s"local[${numWorkers}]") + .config("spark.ui.enabled", value = false) + .config("spark.driver.memory", "512m") + .config("spark.barrier.sync.timeout", 10) + .config("spark.task.cpus", 1) + .getOrCreate() + + SparkTraining.run(spark, iris, "auto", 2) + } +} diff --git a/jvm-packages/xgboost4j-tester/generate_pom.py b/jvm-packages/xgboost4j-tester/generate_pom.py index c0945075592e..b9c274c28a4d 100644 --- a/jvm-packages/xgboost4j-tester/generate_pom.py +++ b/jvm-packages/xgboost4j-tester/generate_pom.py @@ -8,7 +8,7 @@ 4.0.0 ml.dmlc - xgboost4j-tester_${scala.binary.version} + xgboost4j-tester_{scala_binary_version} 1.0-SNAPSHOT xgboost4j-tester @@ -17,16 +17,19 @@ UTF-8 {maven_compiler_source} {maven_compiler_target} + 4.13.2 {spark_version} {scala_version} + 3.2.15 {scala_binary_version} + 5.5.0 - + com.esotericsoftware kryo - 4.0.2 + ${{kryo.version}} org.scala-lang @@ -48,29 +51,12 @@ commons-logging 1.2 - - com.typesafe.akka - akka-testkit_${{scala.binary.version}} - 2.6.20 - test - org.scalatest scalatest_${{scala.binary.version}} - 3.0.8 - test - - - org.scalactic - scalactic_${{scala.binary.version}} - 3.2.15 + ${{scalatest.version}} test - - org.apache.commons - commons-lang3 - 3.9 - org.apache.spark spark-core_${{scala.binary.version}} @@ -92,7 +78,7 @@ junit junit - 4.13.2 + ${{junit.version}} test @@ -122,36 +108,9 @@ - - - maven-clean-plugin - 3.1.0 - - - - maven-resources-plugin - 3.0.2 - - - maven-compiler-plugin - 3.8.0 - - - maven-jar-plugin - 3.0.2 - - - maven-install-plugin - 2.5.2 - - - maven-deploy-plugin - 2.8.2 - org.apache.maven.plugins maven-assembly-plugin - 2.4 jar-with-dependencies @@ -171,22 +130,12 @@ - - - maven-site-plugin - 3.7.1 - - - maven-project-info-reports-plugin - 3.0.0 - org.apache.maven.plugins maven-surefire-plugin - 2.22.1 - ml.dmlc:xgboost4j_2.12 + ml.dmlc:xgboost4j_${{scala.binary.version}} diff --git a/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java b/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java deleted file mode 100644 index 2df69374806a..000000000000 --- a/jvm-packages/xgboost4j-tester/src/test/java/ml/dmlc/xgboost4j/tester/AppTest.java +++ /dev/null @@ -1,20 +0,0 @@ -package ml.dmlc.xgboost4j.tester; - -import static org.junit.Assert.assertTrue; - -import org.junit.Test; - -/** - * Unit test for simple App. - */ -public class AppTest -{ - /** - * Rigorous Test :-) - */ - @Test - public void shouldAnswerWithTrue() - { - assertTrue( true ); - } -} diff --git a/tests/buildkite/build-jvm-packages.sh b/tests/buildkite/build-jvm-packages.sh index 1de43bbd0c6e..33cfffe713bc 100755 --- a/tests/buildkite/build-jvm-packages.sh +++ b/tests/buildkite/build-jvm-packages.sh @@ -4,11 +4,18 @@ set -euo pipefail source tests/buildkite/conftest.sh -echo "--- Build XGBoost JVM packages" +echo "--- Build XGBoost JVM packages scala 2.12" tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ ${SPARK_VERSION} + +echo "--- Build XGBoost JVM packages scala 2.13" + +tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ + ${SPARK_VERSION} "" "" "true" + echo "--- Stash XGBoost4J JARs" buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar" +buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar" diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh index cf9270c1162c..957dd443c5ee 100755 --- a/tests/buildkite/conftest.sh +++ b/tests/buildkite/conftest.sh @@ -25,7 +25,7 @@ set -x CUDA_VERSION=11.8.0 NCCL_VERSION=2.16.5-1 RAPIDS_VERSION=23.02 -SPARK_VERSION=3.1.1 +SPARK_VERSION=3.4.0 JDK_VERSION=8 if [[ -z ${BUILDKITE:-} ]] diff --git a/tests/ci_build/Dockerfile.jvm_cross b/tests/ci_build/Dockerfile.jvm_cross index 6d9c5c57f294..fdfae310aac5 100644 --- a/tests/ci_build/Dockerfile.jvm_cross +++ b/tests/ci_build/Dockerfile.jvm_cross @@ -20,10 +20,14 @@ RUN \ wget -nv https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ ln -s /opt/apache-maven-3.6.1/ /opt/maven && \ - # Spark - wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \ - tar xvf spark-$SPARK_VERSION-bin-hadoop2.7.tgz -C /opt && \ - ln -s /opt/spark-$SPARK_VERSION-bin-hadoop2.7 /opt/spark + # Spark with scala 2.12 + mkdir -p /opt/spark-scala-2.12 && \ + wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz && \ + tar xvf spark-$SPARK_VERSION-bin-hadoop3.tgz --strip-components=1 -C /opt/spark-scala-2.12 && \ + # Spark with scala 2.13 + mkdir -p /opt/spark-scala-2.13 && \ + wget -nv https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz && \ + tar xvf spark-$SPARK_VERSION-bin-hadoop3-scala2.13.tgz --strip-components=1 -C /opt/spark-scala-2.13 ENV PATH=/opt/mambaforge/bin:/opt/spark/bin:/opt/maven/bin:$PATH diff --git a/tests/ci_build/test_jvm_cross.sh b/tests/ci_build/test_jvm_cross.sh index 378846d651db..18265cf015d3 100755 --- a/tests/ci_build/test_jvm_cross.sh +++ b/tests/ci_build/test_jvm_cross.sh @@ -6,37 +6,56 @@ set -x # Initialize local Maven repository ./tests/ci_build/initialize_maven.sh -# Get version number of XGBoost4J and other auxiliary information cd jvm-packages +jvm_packages_dir=`pwd` +# Get version number of XGBoost4J and other auxiliary information xgboost4j_version=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) maven_compiler_source=$(mvn help:evaluate -Dexpression=maven.compiler.source -q -DforceStdout) maven_compiler_target=$(mvn help:evaluate -Dexpression=maven.compiler.target -q -DforceStdout) spark_version=$(mvn help:evaluate -Dexpression=spark.version -q -DforceStdout) -scala_version=$(mvn help:evaluate -Dexpression=scala.version -q -DforceStdout) -scala_binary_version=$(mvn help:evaluate -Dexpression=scala.binary.version -q -DforceStdout) - -# Install XGBoost4J JAR into local Maven repository -mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar -mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests -mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar -mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar - -cd xgboost4j-tester -# Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests -python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version} -# Run unit tests with XGBoost4J -mvn --no-transfer-progress package - -# Run integration tests with XGBoost4J -java -jar ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar - -# Run integration tests with XGBoost4J-Spark -if [ ! -z "$RUN_INTEGRATION_TEST" ] -then + +if [ ! -z "$RUN_INTEGRATION_TEST" ]; then + cd $jvm_packages_dir/xgboost4j-tester python3 get_iris.py - spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv - spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ./target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar ${PWD}/iris.csv ${PWD}/native_model ${PWD}/pipeline_model + cd $jvm_packages_dir fi +# including maven profiles for different scala versions: 2.12 is the default at the moment. +for _maven_profile_string in "" "-Pdefault,scala-2.13"; do + scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout) + scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout) + + # Install XGBoost4J JAR into local Maven repository + mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar + mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}-tests.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=test-jar -Dclassifier=tests + mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-spark/target/xgboost4j-spark_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-spark_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar + mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j-example/target/xgboost4j-example_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j-example_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar + + cd xgboost4j-tester + # Generate pom.xml for XGBoost4J-tester, a dummy project to run XGBoost4J tests + python3 ./generate_pom.py ${xgboost4j_version} ${maven_compiler_source} ${maven_compiler_target} ${spark_version} ${scala_version} ${scala_binary_version} + # Build package and unit tests with XGBoost4J + mvn --no-transfer-progress clean package + xgboost4j_tester_jar="$jvm_packages_dir/xgboost4j-tester/target/xgboost4j-tester_${scala_binary_version}-1.0-SNAPSHOT-jar-with-dependencies.jar" + # Run integration tests with XGBoost4J + java -jar $xgboost4j_tester_jar + + # Run integration tests with XGBoost4J-Spark + if [ ! -z "$RUN_INTEGRATION_TEST" ]; then + # Changing directory so that we do not mix code and resulting files + cd target + if [[ "$scala_binary_version" == "2.12" ]]; then + /opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv + /opt/spark-scala-2.12/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version} + elif [[ "$scala_binary_version" == "2.13" ]]; then + /opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkTraining --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv + /opt/spark-scala-2.13/bin/spark-submit --class ml.dmlc.xgboost4j.scala.example.spark.SparkMLlibPipeline --master 'local[8]' ${xgboost4j_tester_jar} $jvm_packages_dir/xgboost4j-tester/iris.csv ${PWD}/native_model-${scala_version} ${PWD}/pipeline_model-${scala_version} + else + echo "Unexpected scala version: $scala_version ($scala_binary_version)." + fi + fi + cd $jvm_packages_dir +done + set +x set +e