diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b1b81989323..c838ff9ef299 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR) -project(xgboost LANGUAGES CXX C VERSION 2.0.2) +project(xgboost LANGUAGES CXX C VERSION 2.0.3) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") cmake_policy(SET CMP0022 NEW) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 5f609c7e2a1a..7a8e951e0df5 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 2.0.2.1 -Date: 2023-10-12 +Version: 2.0.3.1 +Date: 2023-12-14 Authors@R: c( person("Tianqi", "Chen", role = c("aut"), email = "tianqi.tchen@gmail.com"), diff --git a/R-package/configure b/R-package/configure index d46acbf14b95..cc5a3a44188e 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for xgboost 2.0.2. +# Generated by GNU Autoconf 2.71 for xgboost 2.0.3. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -607,8 +607,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='xgboost' PACKAGE_TARNAME='xgboost' -PACKAGE_VERSION='2.0.2' -PACKAGE_STRING='xgboost 2.0.2' +PACKAGE_VERSION='2.0.3' +PACKAGE_STRING='xgboost 2.0.3' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures xgboost 2.0.2 to adapt to many kinds of systems. +\`configure' configures xgboost 2.0.3 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1287,7 +1287,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of xgboost 2.0.2:";; + short | recursive ) echo "Configuration of xgboost 2.0.3:";; esac cat <<\_ACEOF @@ -1367,7 +1367,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -xgboost configure 2.0.2 +xgboost configure 2.0.3 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by xgboost $as_me 2.0.2, which was +It was created by xgboost $as_me 2.0.3, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by xgboost $as_me 2.0.2, which was +This file was extended by xgboost $as_me 2.0.3, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -xgboost config.status 2.0.2 +xgboost config.status 2.0.3 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/R-package/configure.ac b/R-package/configure.ac index f4cef3c88b3a..806dd20c784b 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.69) -AC_INIT([xgboost],[2.0.2],[],[xgboost],[]) +AC_INIT([xgboost],[2.0.3],[],[xgboost],[]) : ${R_HOME=`R RHOME`} if test -z "${R_HOME}"; then diff --git a/dev/change_scala_version.py b/dev/change_scala_version.py new file mode 100644 index 000000000000..d9438f76adf7 --- /dev/null +++ b/dev/change_scala_version.py @@ -0,0 +1,79 @@ +import argparse +import pathlib +import re +import shutil + + +def main(args): + if args.scala_version == "2.12": + scala_ver = "2.12" + scala_patchver = "2.12.18" + elif args.scala_version == "2.13": + scala_ver = "2.13" + scala_patchver = "2.13.11" + else: + raise ValueError(f"Unsupported Scala version: {args.scala_version}") + + # Clean artifacts + if args.purge_artifacts: + for target in pathlib.Path("jvm-packages/").glob("**/target"): + if target.is_dir(): + print(f"Removing {target}...") + shutil.rmtree(target) + + # Update pom.xml + for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"): + print(f"Updating {pom}...") + with open(pom, "r", encoding="utf-8") as f: + lines = f.readlines() + with open(pom, "w", encoding="utf-8") as f: + replaced_scalaver = False + replaced_scala_binver = False + for line in lines: + for artifact in [ + "xgboost-jvm", + "xgboost4j", + "xgboost4j-gpu", + "xgboost4j-spark", + "xgboost4j-spark-gpu", + "xgboost4j-flink", + "xgboost4j-example", + ]: + line = re.sub( + f"{artifact}_[0-9\\.]*", + f"{artifact}_{scala_ver}", + line, + ) + # Only replace the first occurrence of scala.version + if not replaced_scalaver: + line, nsubs = re.subn( + r"[0-9\.]*", + f"{scala_patchver}", + line, + ) + if nsubs > 0: + replaced_scalaver = True + # Only replace the first occurrence of scala.binary.version + if not replaced_scala_binver: + line, nsubs = re.subn( + r"[0-9\.]*", + f"{scala_ver}", + line, + ) + if nsubs > 0: + replaced_scala_binver = True + f.write(line) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--purge-artifacts", action="store_true") + parser.add_argument( + "--scala-version", + type=str, + required=True, + help="Version of Scala to use in the JVM packages", + choices=["2.12", "2.13"], + ) + parsed_args = parser.parse_args() + main(parsed_args) diff --git a/dev/prepare_jvm_release.py b/dev/prepare_jvm_release.py index 49dffdd0aa5f..361e198100d0 100644 --- a/dev/prepare_jvm_release.py +++ b/dev/prepare_jvm_release.py @@ -2,7 +2,6 @@ import errno import glob import os -import platform import re import shutil import subprocess @@ -88,10 +87,6 @@ def main(): help="Version of the release being prepared", ) args = parser.parse_args() - - if sys.platform != "darwin" or platform.machine() != "arm64": - raise NotImplementedError("Please run this script using an M1 Mac") - version = args.release_version expected_git_tag = "v" + version current_git_tag = get_current_git_tag() @@ -141,6 +136,7 @@ def main(): ("linux", "x86_64"), ("windows", "x86_64"), ("macos", "x86_64"), + ("macos", "aarch64"), ]: output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}" maybe_makedirs(output_dir) @@ -164,6 +160,10 @@ def main(): url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib", filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib", ) + retrieve( + url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_m1_{commit_hash}.dylib", + filename="xgboost4j/src/main/resources/lib/macos/aarch64/libxgboost4j.dylib", + ) with tempfile.TemporaryDirectory() as tempdir: # libxgboost4j.so for Linux x86_64, CPU only @@ -211,9 +211,14 @@ def main(): "https://central.sonatype.org/publish/publish-maven/" ) print( - "3. Now on a M1 Mac machine, run the following to build Scala 2.12 artifacts:" + "3. Now on a Linux machine, run the following to build Scala 2.12 artifacts. " + "Make sure to use an Internet connection with fast upload speed:" + ) + print( + " # Skip native build, since we have all needed native binaries from CI\n" + " export MAVEN_SKIP_NATIVE_BUILD=1\n" + " GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests" ) - print(" GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests") print( "4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging " "Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx " @@ -221,11 +226,14 @@ def main(): "artifacts to the Maven Central repository. The top-level metapackage should be " "named xgboost-jvm_2.12." ) - print("5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:") - print(" rm -rf targets/") - print(" GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests") print( - "6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts." + "5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n" + " export MAVEN_SKIP_NATIVE_BUILD=1\n" + " python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts\n" + " GPG_TTY=$(tty) mvn deploy -Prelease-cpu-only,scala-2.13 -DskipTests" + ) + print( + "6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts. " "The top-level metapackage should be named xgboost-jvm_2.13." ) diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h index a69dcfab9064..999fc845389e 100644 --- a/include/xgboost/version_config.h +++ b/include/xgboost/version_config.h @@ -6,6 +6,6 @@ #define XGBOOST_VER_MAJOR 2 /* NOLINT */ #define XGBOOST_VER_MINOR 0 /* NOLINT */ -#define XGBOOST_VER_PATCH 2 /* NOLINT */ +#define XGBOOST_VER_PATCH 3 /* NOLINT */ #endif // XGBOOST_VERSION_CONFIG_H_ diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py index 18908fc1c0d5..4d655bfd3158 100755 --- a/jvm-packages/create_jni.py +++ b/jvm-packages/create_jni.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -import errno import argparse +import errno import glob import os import platform @@ -19,11 +19,10 @@ "USE_HDFS": "OFF", "USE_AZURE": "OFF", "USE_S3": "OFF", - "USE_CUDA": "OFF", "USE_NCCL": "OFF", "JVM_BINDINGS": "ON", - "LOG_CAPI_INVOCATION": "OFF" + "LOG_CAPI_INVOCATION": "OFF", } @@ -70,26 +69,22 @@ def normpath(path): return normalized -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--log-capi-invocation', type=str, choices=['ON', 'OFF'], default='OFF') - parser.add_argument('--use-cuda', type=str, choices=['ON', 'OFF'], default='OFF') - cli_args = parser.parse_args() - +def native_build(args): if sys.platform == "darwin": # Enable of your compiler supports OpenMP. CONFIG["USE_OPENMP"] = "OFF" - os.environ["JAVA_HOME"] = subprocess.check_output( - "/usr/libexec/java_home").strip().decode() + os.environ["JAVA_HOME"] = ( + subprocess.check_output("/usr/libexec/java_home").strip().decode() + ) print("building Java wrapper") with cd(".."): - build_dir = 'build-gpu' if cli_args.use_cuda == 'ON' else 'build' + build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build" maybe_makedirs(build_dir) with cd(build_dir): if sys.platform == "win32": # Force x64 build on Windows. - maybe_generator = ' -A x64' + maybe_generator = " -A x64" else: maybe_generator = "" if sys.platform == "linux": @@ -97,12 +92,12 @@ def normpath(path): else: maybe_parallel_build = "" - if cli_args.log_capi_invocation == 'ON': - CONFIG['LOG_CAPI_INVOCATION'] = 'ON' + if cli_args.log_capi_invocation == "ON": + CONFIG["LOG_CAPI_INVOCATION"] = "ON" - if cli_args.use_cuda == 'ON': - CONFIG['USE_CUDA'] = 'ON' - CONFIG['USE_NCCL'] = 'ON' + if cli_args.use_cuda == "ON": + CONFIG["USE_CUDA"] = "ON" + CONFIG["USE_NCCL"] = "ON" args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()] @@ -115,7 +110,7 @@ def normpath(path): if gpu_arch_flag is not None: args.append("%s" % gpu_arch_flag) - lib_dir = os.path.join(os.pardir, 'lib') + lib_dir = os.path.join(os.pardir, "lib") if os.path.exists(lib_dir): shutil.rmtree(lib_dir) run("cmake .. " + " ".join(args) + maybe_generator) @@ -125,8 +120,10 @@ def normpath(path): run(f'"{sys.executable}" mapfeat.py') run(f'"{sys.executable}" mknfold.py machine.txt 1') - xgboost4j = 'xgboost4j-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j' - xgboost4j_spark = 'xgboost4j-spark-gpu' if cli_args.use_cuda == 'ON' else 'xgboost4j-spark' + xgboost4j = "xgboost4j-gpu" if cli_args.use_cuda == "ON" else "xgboost4j" + xgboost4j_spark = ( + "xgboost4j-spark-gpu" if cli_args.use_cuda == "ON" else "xgboost4j-spark" + ) print("copying native library") library_name, os_folder = { @@ -141,14 +138,19 @@ def normpath(path): "i86pc": "x86_64", # on Solaris x86_64 "sun4v": "sparc", # on Solaris sparc "arm64": "aarch64", # on macOS & Windows ARM 64-bit - "aarch64": "aarch64" + "aarch64": "aarch64", }[platform.machine().lower()] - output_folder = "{}/src/main/resources/lib/{}/{}".format(xgboost4j, os_folder, arch_folder) + output_folder = "{}/src/main/resources/lib/{}/{}".format( + xgboost4j, os_folder, arch_folder + ) maybe_makedirs(output_folder) cp("../lib/" + library_name, output_folder) print("copying pure-Python tracker") - cp("../python-package/xgboost/tracker.py", "{}/src/main/resources".format(xgboost4j)) + cp( + "../python-package/xgboost/tracker.py", + "{}/src/main/resources".format(xgboost4j), + ) print("copying train/test files") maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark)) @@ -164,3 +166,18 @@ def normpath(path): maybe_makedirs("{}/src/test/resources".format(xgboost4j)) for file in glob.glob("../demo/data/agaricus.*"): cp(file, "{}/src/test/resources".format(xgboost4j)) + + +if __name__ == "__main__": + if "MAVEN_SKIP_NATIVE_BUILD" in os.environ: + print("MAVEN_SKIP_NATIVE_BUILD is set. Skipping native build...") + else: + parser = argparse.ArgumentParser() + parser.add_argument( + "--log-capi-invocation", type=str, choices=["ON", "OFF"], default="OFF" + ) + parser.add_argument( + "--use-cuda", type=str, choices=["ON", "OFF"], default="OFF" + ) + cli_args = parser.parse_args() + native_build(cli_args) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 1eb6b9f02983..b57b569db926 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -5,8 +5,8 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} - 2.0.2 + xgboost-jvm_2.12 + 2.0.3 pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index c77f046dcc50..d8a14825faf9 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -5,12 +5,12 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} - 2.0.2 + xgboost-jvm_2.12 + 2.0.3 xgboost4j-example - xgboost4j-example_${scala.binary.version} - 2.0.2 + xgboost4j-example_2.12 + 2.0.3 jar @@ -26,7 +26,7 @@ ml.dmlc - xgboost4j-spark_${scala.binary.version} + xgboost4j-spark_2.12 ${project.version} @@ -37,7 +37,7 @@ ml.dmlc - xgboost4j-flink_${scala.binary.version} + xgboost4j-flink_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index a9a6644a0b63..a0fce50c79c2 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -5,13 +5,13 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} - 2.0.2 + xgboost-jvm_2.12 + 2.0.3 xgboost4j-flink - xgboost4j-flink_${scala.binary.version} - 2.0.2 + xgboost4j-flink_2.12 + 2.0.3 2.2.0 @@ -30,7 +30,7 @@ ml.dmlc - xgboost4j_${scala.binary.version} + xgboost4j_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 056b1f7fa835..d4067df48bd9 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -5,12 +5,12 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} - 2.0.2 + xgboost-jvm_2.12 + 2.0.3 - xgboost4j-gpu_${scala.binary.version} + xgboost4j-gpu_2.12 xgboost4j-gpu - 2.0.2 + 2.0.3 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 9aa810c8cb77..d64cd81ce4d7 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -5,11 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} - 2.0.2 + xgboost-jvm_2.12 + 2.0.3 xgboost4j-spark-gpu - xgboost4j-spark-gpu_${scala.binary.version} + xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc - xgboost4j-gpu_${scala.binary.version} + xgboost4j-gpu_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index e5dc92399cd6..502eb7ad7143 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -5,11 +5,11 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} - 2.0.2 + xgboost-jvm_2.12 + 2.0.3 xgboost4j-spark - xgboost4j-spark_${scala.binary.version} + xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc - xgboost4j_${scala.binary.version} + xgboost4j_2.12 ${project.version} diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 3da9a9ce68f2..72b02a65abad 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -5,12 +5,12 @@ 4.0.0 ml.dmlc - xgboost-jvm_${scala.binary.version} - 2.0.2 + xgboost-jvm_2.12 + 2.0.3 xgboost4j - xgboost4j_${scala.binary.version} - 2.0.2 + xgboost4j_2.12 + 2.0.3 jar diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml index 4434424882ba..90f127bc1afd 100644 --- a/python-package/pyproject.toml +++ b/python-package/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "packager.pep517" [project] name = "xgboost" -version = "2.0.2" +version = "2.0.3" authors = [ { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" }, { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" } diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION index e9307ca5751b..50ffc5aa7f69 100644 --- a/python-package/xgboost/VERSION +++ b/python-package/xgboost/VERSION @@ -1 +1 @@ -2.0.2 +2.0.3 diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 53381f5e2de3..2910dee2d587 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -206,6 +206,7 @@ def _load_lib() -> ctypes.CDLL: lib = ctypes.cdll.LoadLibrary(lib_path) setattr(lib, "path", os.path.normpath(lib_path)) lib_success = True + break except OSError as e: os_error_list.append(str(e)) continue diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index 219ad2698a5a..2b1d692d3848 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -78,7 +78,6 @@ from .sklearn import ( XGBClassifier, XGBClassifierBase, - XGBClassifierMixIn, XGBModel, XGBRanker, XGBRankerMixIn, @@ -1854,7 +1853,7 @@ def fit( "Implementation of the scikit-learn API for XGBoost classification.", ["estimators", "model"], ) -class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierMixIn, XGBClassifierBase): +class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase): # pylint: disable=missing-class-docstring async def _fit_async( self, @@ -2036,10 +2035,6 @@ def _argmax(x: Any) -> Any: preds = da.map_blocks(_argmax, pred_probs, drop_axis=1) return preds - def load_model(self, fname: ModelIn) -> None: - super().load_model(fname) - self._load_model_attributes(self.get_booster()) - @xgboost_model_doc( """Implementation of the Scikit-Learn API for XGBoost Ranking. diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index fab5cfdaf834..0f1ed1417825 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -43,19 +43,6 @@ from .training import train -class XGBClassifierMixIn: # pylint: disable=too-few-public-methods - """MixIn for classification.""" - - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - - def _load_model_attributes(self, booster: Booster) -> None: - config = json.loads(booster.save_config()) - self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"]) - # binary classification is treated as regression in XGBoost. - self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_ - - class XGBRankerMixIn: # pylint: disable=too-few-public-methods """MixIn for ranking, defines the _estimator_type usually defined in scikit-learn base classes. @@ -845,21 +832,38 @@ def load_model(self, fname: ModelIn) -> None: self.get_booster().load_model(fname) meta_str = self.get_booster().attr("scikit_learn") - if meta_str is None: - return + if meta_str is not None: + meta = json.loads(meta_str) + t = meta.get("_estimator_type", None) + if t is not None and t != self._get_type(): + raise TypeError( + "Loading an estimator with different type. Expecting: " + f"{self._get_type()}, got: {t}" + ) - meta = json.loads(meta_str) - t = meta.get("_estimator_type", None) - if t is not None and t != self._get_type(): - raise TypeError( - "Loading an estimator with different type. Expecting: " - f"{self._get_type()}, got: {t}" - ) self.feature_types = self.get_booster().feature_types self.get_booster().set_attr(scikit_learn=None) + config = json.loads(self.get_booster().save_config()) + self._load_model_attributes(config) load_model.__doc__ = f"""{Booster.load_model.__doc__}""" + def _load_model_attributes(self, config: dict) -> None: + """Load model attributes without hyper-parameters.""" + from sklearn.base import is_classifier + + booster = self.get_booster() + + self.objective = config["learner"]["objective"]["name"] + self.booster = config["learner"]["gradient_booster"]["name"] + self.base_score = config["learner"]["learner_model_param"]["base_score"] + self.feature_types = booster.feature_types + + if is_classifier(self): + self.n_classes_ = int(config["learner"]["learner_model_param"]["num_class"]) + # binary classification is treated as regression in XGBoost. + self.n_classes_ = 2 if self.n_classes_ < 2 else self.n_classes_ + # pylint: disable=too-many-branches def _configure_fit( self, @@ -1409,7 +1413,7 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) -> Number of boosting rounds. """, ) -class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase): +class XGBClassifier(XGBModel, XGBClassifierBase): # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes @_deprecate_positional_args def __init__( @@ -1637,10 +1641,6 @@ def predict_proba( def classes_(self) -> np.ndarray: return np.arange(self.n_classes_) - def load_model(self, fname: ModelIn) -> None: - super().load_model(fname) - self._load_model_attributes(self.get_booster()) - @xgboost_model_doc( "scikit-learn API for XGBoost random forest classification.", diff --git a/tests/buildkite/build-jvm-packages.sh b/tests/buildkite/build-jvm-packages.sh index 33cfffe713bc..b36d2ae594c5 100755 --- a/tests/buildkite/build-jvm-packages.sh +++ b/tests/buildkite/build-jvm-packages.sh @@ -8,13 +8,18 @@ echo "--- Build XGBoost JVM packages scala 2.12" tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ ${SPARK_VERSION} +echo "--- Stash XGBoost4J JARs (Scala 2.12)" +buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar" +buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar" +buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar" +buildkite-agent artifact upload "jvm-packages/xgboost4j-example/target/*.jar" echo "--- Build XGBoost JVM packages scala 2.13" tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ ${SPARK_VERSION} "" "" "true" -echo "--- Stash XGBoost4J JARs" +echo "--- Stash XGBoost4J JARs (Scala 2.13)" buildkite-agent artifact upload "jvm-packages/xgboost4j/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-spark/target/*.jar" buildkite-agent artifact upload "jvm-packages/xgboost4j-flink/target/*.jar" diff --git a/tests/buildkite/pipeline-mac-m1.yml b/tests/buildkite/pipeline-mac-m1.yml new file mode 100644 index 000000000000..7e4a664acf6b --- /dev/null +++ b/tests/buildkite/pipeline-mac-m1.yml @@ -0,0 +1,8 @@ +steps: + - block: ":rocket: Run this test job" + if: build.pull_request.id != null || build.branch =~ /^dependabot\// + - label: ":macos: Build and Test XGBoost for MacOS M1 with Clang 11" + command: "tests/buildkite/test-macos-m1-clang11.sh" + key: mac-m1-appleclang11 + agents: + queue: mac-mini-m1 diff --git a/tests/buildkite/test-macos-m1-clang11.sh b/tests/buildkite/test-macos-m1-clang11.sh new file mode 100755 index 000000000000..401701b42223 --- /dev/null +++ b/tests/buildkite/test-macos-m1-clang11.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +set -euo pipefail + +source tests/buildkite/conftest.sh + +# Display system info +echo "--- Display system information" +set -x +system_profiler SPSoftwareDataType +sysctl -n machdep.cpu.brand_string +uname -m +set +x + +# Build XGBoost4J binary +echo "--- Build libxgboost4j.dylib" +set -x +mkdir build +pushd build +export JAVA_HOME=$(/usr/libexec/java_home) +cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=OFF -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 +ninja -v +popd +rm -rf build +set +x + +echo "--- Upload Python wheel" +set -x +pushd lib +mv -v libxgboost4j.dylib libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib +buildkite-agent artifact upload libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib +if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +then + aws s3 cp libxgboost4j_m1_${BUILDKITE_COMMIT}.dylib \ + s3://xgboost-nightly-builds/${BRANCH_NAME}/libxgboost4j/ \ + --acl public-read --no-progress +fi +popd +set +x + +# Ensure that XGBoost can be built with Clang 11 +echo "--- Build and Test XGBoost with MacOS M1, Clang 11" +set -x +LLVM11_PATH=$(brew --prefix llvm\@11) +mkdir build +pushd build +cmake .. -GNinja -DCMAKE_C_COMPILER=${LLVM11_PATH}/bin/clang \ + -DCMAKE_CXX_COMPILER=${LLVM11_PATH}/bin/clang++ -DGOOGLE_TEST=ON \ + -DUSE_DMLC_GTEST=ON +ninja -v diff --git a/tests/ci_build/build_jvm_packages.sh b/tests/ci_build/build_jvm_packages.sh index 5797a1f61964..bec8750f5bc0 100755 --- a/tests/ci_build/build_jvm_packages.sh +++ b/tests/ci_build/build_jvm_packages.sh @@ -27,6 +27,9 @@ fi mvn_profile_string="" if [ "x$use_scala213" != "x" ]; then export mvn_profile_string="-Pdefault,scala-2.13" + cd .. + python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts + cd jvm-packages fi mvn --no-transfer-progress package $mvn_profile_string -Dspark.version=${spark_version} $gpu_options diff --git a/tests/ci_build/conda_env/macos_cpu_test.yml b/tests/ci_build/conda_env/macos_cpu_test.yml index dfc1ee6005f3..ce9ca4b1ba7c 100644 --- a/tests/ci_build/conda_env/macos_cpu_test.yml +++ b/tests/ci_build/conda_env/macos_cpu_test.yml @@ -32,11 +32,10 @@ dependencies: - jsonschema - boto3 - awscli -- py-ubjson - cffi - pyarrow - pyspark>=3.4.0 - cloudpickle - pip: - sphinx_rtd_theme - - datatable + - py-ubjson diff --git a/tests/ci_build/deploy_jvm_packages.sh b/tests/ci_build/deploy_jvm_packages.sh index 5f448ee2aed0..265e864fd0fd 100755 --- a/tests/ci_build/deploy_jvm_packages.sh +++ b/tests/ci_build/deploy_jvm_packages.sh @@ -27,6 +27,9 @@ rm -rf ../build/ # Deploy to S3 bucket xgboost-maven-repo mvn --no-transfer-progress package deploy -P default,gpu,release-to-s3 -Dspark.version=${spark_version} -DskipTests # Deploy scala 2.13 to S3 bucket xgboost-maven-repo +cd .. +python dev/change_scala_version.py --scala-version 2.13 --purge-artifacts +cd jvm-packages/ mvn --no-transfer-progress package deploy -P release-to-s3,default,scala-2.13 -Dspark.version=${spark_version} -DskipTests diff --git a/tests/ci_build/test_jvm_cross.sh b/tests/ci_build/test_jvm_cross.sh index 18265cf015d3..1eef747818b0 100755 --- a/tests/ci_build/test_jvm_cross.sh +++ b/tests/ci_build/test_jvm_cross.sh @@ -21,9 +21,18 @@ if [ ! -z "$RUN_INTEGRATION_TEST" ]; then fi # including maven profiles for different scala versions: 2.12 is the default at the moment. -for _maven_profile_string in "" "-Pdefault,scala-2.13"; do +for scala_binary_version in "2.12" "2.13"; do + cd .. + python dev/change_scala_version.py --scala-version ${scala_binary_version} + cd jvm-packages scala_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.version -q -DforceStdout) - scala_binary_version=$(mvn help:evaluate $_maven_profile_string -Dexpression=scala.binary.version -q -DforceStdout) + if [[ "$scala_binary_version" == "2.12" ]]; then + _maven_profile_string="" + elif [[ "$scala_binary_version" == "2.13" ]]; then + _maven_profile_string="-Pdefault,scala-2.13" + else + echo "Unexpected scala version: $scala_version ($scala_binary_version)." + fi # Install XGBoost4J JAR into local Maven repository mvn --no-transfer-progress install:install-file -Dfile=./xgboost4j/target/xgboost4j_${scala_binary_version}-${xgboost4j_version}.jar -DgroupId=ml.dmlc -DartifactId=xgboost4j_${scala_binary_version} -Dversion=${xgboost4j_version} -Dpackaging=jar diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 481c4e5503af..9d20fdfd8e1c 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -940,6 +940,7 @@ def save_load_model(model_path): predt_0 = clf.predict(X) clf.save_model(model_path) clf.load_model(model_path) + assert clf.booster == "gblinear" predt_1 = clf.predict(X) np.testing.assert_allclose(predt_0, predt_1) assert clf.best_iteration == best_iteration @@ -955,25 +956,26 @@ def save_load_model(model_path): def test_save_load_model(): with tempfile.TemporaryDirectory() as tempdir: - model_path = os.path.join(tempdir, 'digits.model') + model_path = os.path.join(tempdir, "digits.model") save_load_model(model_path) with tempfile.TemporaryDirectory() as tempdir: - model_path = os.path.join(tempdir, 'digits.model.json') + model_path = os.path.join(tempdir, "digits.model.json") save_load_model(model_path) from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split with tempfile.TemporaryDirectory() as tempdir: - model_path = os.path.join(tempdir, 'digits.model.ubj') + model_path = os.path.join(tempdir, "digits.model.ubj") digits = load_digits(n_class=2) - y = digits['target'] - X = digits['data'] - booster = xgb.train({'tree_method': 'hist', - 'objective': 'binary:logistic'}, - dtrain=xgb.DMatrix(X, y), - num_boost_round=4) + y = digits["target"] + X = digits["data"] + booster = xgb.train( + {"tree_method": "hist", "objective": "binary:logistic"}, + dtrain=xgb.DMatrix(X, y), + num_boost_round=4, + ) predt_0 = booster.predict(xgb.DMatrix(X)) booster.save_model(model_path) cls = xgb.XGBClassifier() @@ -1007,6 +1009,8 @@ def test_save_load_model(): clf = xgb.XGBClassifier() clf.load_model(model_path) assert clf.classes_.size == 10 + assert clf.objective == "multi:softprob" + np.testing.assert_equal(clf.classes_, np.arange(10)) assert clf.n_classes_ == 10 diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py index 664c0b89cf1a..efd10405cad0 100644 --- a/tests/test_distributed/test_with_dask/test_with_dask.py +++ b/tests/test_distributed/test_with_dask/test_with_dask.py @@ -1932,6 +1932,7 @@ def test_sklearn_io(self, client: "Client") -> None: cls.client = client cls.fit(X, y) predt_0 = cls.predict(X) + proba_0 = cls.predict_proba(X) with tempfile.TemporaryDirectory() as tmpdir: path = os.path.join(tmpdir, "model.pkl") @@ -1941,7 +1942,9 @@ def test_sklearn_io(self, client: "Client") -> None: with open(path, "rb") as fd: cls = pickle.load(fd) predt_1 = cls.predict(X) + proba_1 = cls.predict_proba(X) np.testing.assert_allclose(predt_0.compute(), predt_1.compute()) + np.testing.assert_allclose(proba_0.compute(), proba_1.compute()) path = os.path.join(tmpdir, "cls.json") cls.save_model(path) @@ -1950,16 +1953,20 @@ def test_sklearn_io(self, client: "Client") -> None: cls.load_model(path) assert cls.n_classes_ == 10 predt_2 = cls.predict(X) + proba_2 = cls.predict_proba(X) np.testing.assert_allclose(predt_0.compute(), predt_2.compute()) + np.testing.assert_allclose(proba_0.compute(), proba_2.compute()) # Use single node to load cls = xgb.XGBClassifier() cls.load_model(path) assert cls.n_classes_ == 10 predt_3 = cls.predict(X_) + proba_3 = cls.predict_proba(X_) np.testing.assert_allclose(predt_0.compute(), predt_3) + np.testing.assert_allclose(proba_0.compute(), proba_3) def test_dask_unsupported_features(client: "Client") -> None: