From 7bd9d041f41aaf96816295cafce86cdfdc6a4e09 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Thu, 14 Dec 2023 04:31:06 +0000 Subject: [PATCH 1/2] [jvm-packages] Fix POM for xgboost-jvm metapackage --- dev/prepare_jvm_release.py | 24 ++++++--- jvm-packages/create_jni.py | 65 +++++++++++++++--------- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 2 +- jvm-packages/xgboost4j-flink/pom.xml | 2 +- jvm-packages/xgboost4j-gpu/pom.xml | 2 +- jvm-packages/xgboost4j-spark-gpu/pom.xml | 2 +- jvm-packages/xgboost4j-spark/pom.xml | 2 +- jvm-packages/xgboost4j/pom.xml | 2 +- 9 files changed, 64 insertions(+), 39 deletions(-) diff --git a/dev/prepare_jvm_release.py b/dev/prepare_jvm_release.py index 49dffdd0aa5f..1050a51c7265 100644 --- a/dev/prepare_jvm_release.py +++ b/dev/prepare_jvm_release.py @@ -2,7 +2,6 @@ import errno import glob import os -import platform import re import shutil import subprocess @@ -88,10 +87,6 @@ def main(): help="Version of the release being prepared", ) args = parser.parse_args() - - if sys.platform != "darwin" or platform.machine() != "arm64": - raise NotImplementedError("Please run this script using an M1 Mac") - version = args.release_version expected_git_tag = "v" + version current_git_tag = get_current_git_tag() @@ -141,6 +136,7 @@ def main(): ("linux", "x86_64"), ("windows", "x86_64"), ("macos", "x86_64"), + ("macos", "aarch64"), ]: output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}" maybe_makedirs(output_dir) @@ -164,6 +160,10 @@ def main(): url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib", filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib", ) + retrieve( + url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_m1_{commit_hash}.dylib", + filename="xgboost4j/src/main/resources/lib/macos/aarch64/libxgboost4j.dylib", + ) with tempfile.TemporaryDirectory() as tempdir: # libxgboost4j.so for Linux x86_64, CPU only @@ -211,9 +211,14 @@ def main(): "https://central.sonatype.org/publish/publish-maven/" ) print( - "3. Now on a M1 Mac machine, run the following to build Scala 2.12 artifacts:" + "3. Now on a Linux machine, run the following to build Scala 2.12 artifacts." + "Make sure to use an Internet connection with fast upload speed:" + ) + print( + " # Skip native build, since we have all needed native binaries from CI" + " export MAVEN_SKIP_NATIVE_BUILD=1" + " GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests" ) - print(" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests") print( "4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging " "Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx " @@ -222,7 +227,10 @@ def main(): "named xgboost-jvm_2.12." ) print("5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:") - print(" rm -rf targets/") + print( + " find . -name target -exec rm -rv {} +" + " find . -name pom.xml -exec sed -i 's/xgboost-jvm_2.12/xgboost-jvm_2.13/g' {} +" + ) print(" GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests") print( "6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts." diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py index 18908fc1c0d5..4d655bfd3158 100755 --- a/jvm-packages/create_jni.py +++ b/jvm-packages/create_jni.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -import errno import argparse +import errno import glob import os import platform @@ -19,11 +19,10 @@ "USE_HDFS": "OFF", "USE_AZURE": "OFF", "USE_S3": "OFF", - "USE_CUDA": "OFF", "USE_NCCL": "OFF", "JVM_BINDINGS": "ON", - "LOG_CAPI_INVOCATION": "OFF" + "LOG_CAPI_INVOCATION": "OFF", } @@ -70,26 +69,22 @@ def normpath(path): return normalized -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF') - parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF') - cli_args = parser.parse_args() - +def native_build(args): if sys.platform == "darwin": # Enable of your compiler supports OpenMP. CONFIG["USE_OPENMP"] = "OFF" - os.environ["JAVA_HOME"] = subprocess.check_output( - "/usr/libexec/java_home").strip().decode() + os.environ["JAVA_HOME"] = ( + subprocess.check_output("/usr/libexec/java_home").strip().decode() + ) print("building Java wrapper") with cd(".."): - build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' else 'build' + build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build" maybe_makedirs(build_dir) with cd(build_dir): if sys.platform == "win32": # Force x64 build on Windows. - maybe_generator = ' -A x64' + maybe_generator = " -A x64" else: maybe_generator = "" if sys.platform == "linux": @@ -97,12 +92,12 @@ def normpath(path): else: maybe_parallel_build = "" - if cli_args.log_capi_invocation == 'ON': - CONFIG['LOG_CAPI_INVOCATION'] = 'ON' + if cli_args.log_capi_invocation == "ON": + CONFIG["LOG_CAPI_INVOCATION"] = "ON" - if cli_args.use_cuda == 'ON': - CONFIG['USE_CUDA'] = 'ON' - CONFIG['USE_NCCL'] = 'ON' + if cli_args.use_cuda == "ON": + CONFIG["USE_CUDA"] = "ON" + CONFIG["USE_NCCL"] = "ON" args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()] @@ -115,7 +110,7 @@ def normpath(path): if gpu_arch_flag is not None: args.append("%s" % gpu_arch_flag) - lib_dir = os.path.join(os.pardir, 'lib') + lib_dir = os.path.join(os.pardir, "lib") if os.path.exists(lib_dir): shutil.rmtree(lib_dir) run("cmake .. " + " ".join(args) + maybe_generator) @@ -125,8 +120,10 @@ def normpath(path): run(f'"{sys.executable}" mapfeat.py') run(f'"{sys.executable}" mknfold.py machine.txt 1') - xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j' - xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j-spark' + xgboost4j = "xgboost4j-gpu" if cli_args.use_cuda == "ON" else "xgboost4j" + xgboost4j_spark = ( + "xgboost4j-spark-gpu" if cli_args.use_cuda == "ON" else "xgboost4j-spark" + ) print("copying native library") library_name, os_folder = { @@ -141,14 +138,19 @@ def normpath(path): "i86pc": "x86_64", # on Solaris x86_64 "sun4v": "sparc", # on Solaris sparc "arm64": "aarch64", # on macOS & Windows ARM 64-bit - "aarch64": "aarch64" + "aarch64": "aarch64", }[platform.machine().lower()] - output_folder = "{}/src/main/resources/lib/{}/{}".format(xgboost4j, os_folder, arch_folder) + output_folder = "{}/src/main/resources/lib/{}/{}".format( + xgboost4j, os_folder, arch_folder + ) maybe_makedirs(output_folder) cp("../lib/" + library_name, output_folder) print("copying pure-Python tracker") - cp("../python-package/xgboost/tracker.py", "{}/src/main/resources".format(xgboost4j)) + cp( + "../python-package/xgboost/tracker.py", + "{}/src/main/resources".format(xgboost4j), + ) print("copying train/test files") maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark)) @@ -164,3 +166,18 @@ def normpath(path): maybe_makedirs("{}/src/test/resources".format(xgboost4j)) for file in glob.glob("../demo/data/agaricus.*"): cp(file, "{}/src/test/resources".format(xgboost4j)) + + +if __name__ == "__main__": + if "MAVEN_SKIP_NATIVE_BUILD" in os.environ: + print("MAVEN_SKIP_NATIVE_BUILD is set. Skipping native build...") + else: + parser = argparse.ArgumentParser() + parser.add_argument( + "--log-capi-invocation", type=str, choices=["ON", "OFF"], default="OFF" + ) + parser.add_argument( + "--use-cuda", type=str, choices=["ON", "OFF"], default="OFF" + ) + cli_args = parser.parse_args() + native_build(cli_args) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 1eb6b9f02983..f13cff51da21 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -5,7 +5,7 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} + xgboost-jvm_2.12 2.0.2 pom XGBoost JVM Package diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index c77f046dcc50..6aa10430f34a 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -5,7 +5,7 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} + xgboost-jvm_2.12 2.0.2 xgboost4j-example diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index a9a6644a0b63..79a30fde35c2 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -5,7 +5,7 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} + xgboost-jvm_2.12 2.0.2 diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 056b1f7fa835..c50013f95cbf 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -5,7 +5,7 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} + xgboost-jvm_2.12 2.0.2 xgboost4j-gpu_${scala.binary.version} diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 9aa810c8cb77..fb3aed8ac9be 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -5,7 +5,7 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} + xgboost-jvm_2.12 2.0.2 xgboost4j-spark-gpu diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index e5dc92399cd6..71d5127d88ce 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -5,7 +5,7 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} + xgboost-jvm_2.12 2.0.2 xgboost4j-spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 3da9a9ce68f2..95c9e0bc2c0f 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -5,7 +5,7 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} + xgboost-jvm_2.12 2.0.2 xgboost4j From 3378285c46cf7854583f32aab0a897e655c98c6d Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Fri, 15 Dec 2023 00:48:38 +0000 Subject: [PATCH 2/2] Add script for updating the Scala version --- dev/change_scala_version.py | 45 +++++++++++++++++++++++++++++++++++++ dev/prepare_jvm_release.py | 16 ++++++------- 2 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 dev/change_scala_version.py diff --git a/dev/change_scala_version.py b/dev/change_scala_version.py new file mode 100644 index 000000000000..ba385521f9c3 --- /dev/null +++ b/dev/change_scala_version.py @@ -0,0 +1,45 @@ +import argparse +import pathlib +import re +import shutil + +try: + import sh +except ImportError as e: + raise ImportError( + "Please install sh in your Python environment.\n" + " - Pip: pip install sh\n" + " - Conda: conda install -c conda-forge sh" + ) from e + + +def main(args): + # Clean artifacts + for target in pathlib.Path("jvm-packages/").glob("**/target"): + if target.is_dir(): + print(f"Removing {target}...") + shutil.rmtree(target) + + # Update pom.xml + for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"): + print(f"Updating {pom}...") + sh.sed( + [ + "-i", + f"s/xgboost-jvm_[0-9\\.]*/xgboost-jvm_{args.scala_version}/g", + str(pom), + ] + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--scala-version", + type=str, + required=True, + help="Version of Scala to use in the JVM packages", + choices=["2.12", "2.13"], + ) + parsed_args = parser.parse_args() + main(parsed_args) diff --git a/dev/prepare_jvm_release.py b/dev/prepare_jvm_release.py index 1050a51c7265..99960890ab14 100644 --- a/dev/prepare_jvm_release.py +++ b/dev/prepare_jvm_release.py @@ -211,12 +211,12 @@ def main(): "https://central.sonatype.org/publish/publish-maven/" ) print( - "3. Now on a Linux machine, run the following to build Scala 2.12 artifacts." + "3. Now on a Linux machine, run the following to build Scala 2.12 artifacts. " "Make sure to use an Internet connection with fast upload speed:" ) print( - " # Skip native build, since we have all needed native binaries from CI" - " export MAVEN_SKIP_NATIVE_BUILD=1" + " # Skip native build, since we have all needed native binaries from CI\n" + " export MAVEN_SKIP_NATIVE_BUILD=1\n" " GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests" ) print( @@ -226,14 +226,14 @@ def main(): "artifacts to the Maven Central repository. The top-level metapackage should be " "named xgboost-jvm_2.12." ) - print("5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:") print( - " find . -name target -exec rm -rv {} +" - " find . -name pom.xml -exec sed -i 's/xgboost-jvm_2.12/xgboost-jvm_2.13/g' {} +" + "5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n" + " export MAVEN_SKIP_NATIVE_BUILD=1\n" + " python dev/change_scala_version.py --scala-version 2.13\n" + " GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests" ) - print(" GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests") print( - "6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts." + "6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts. " "The top-level metapackage should be named xgboost-jvm_2.13." )