[release] LightGBM release tests (#17043)

ray-project · Jul 14, 2021 · cfc5806 · cfc5806
1 parent 0f79ebb
commit cfc5806
Show file tree

Hide file tree

Showing 26 changed files with 1,199 additions and 0 deletions.
diff --git a/python/ray/util/lightgbm/BUILD b/python/ray/util/lightgbm/BUILD
@@ -0,0 +1,36 @@
+# --------------------------------------------------------------------
+# Tests from the python/ray/util/lightgbm directory.
+# Please keep these sorted alphabetically.
+# --------------------------------------------------------------------
+py_test(
+ name = "simple_example",
+ size = "small",
+ srcs = ["simple_example.py"],
+ deps = [":lgbm_lib"],
+ tags = ["exclusive"],
+)
+
+py_test(
+ name = "simple_tune",
+ size="small",
+ srcs = ["simple_tune.py"],
+ deps = [":lgbm_lib"],
+ tags = ["exclusive"]
+)
+
+py_test(
+    name = "test_client",
+    size = "small",
+    srcs = ["tests/test_client.py"],
+    deps = [":lgbm_lib"],
+    tags = ["exclusive", "client"]
+)
+
+# This is a dummy test dependency that causes the above tests to be
+# re-run if any of these files changes.
+py_library(
+ name = "lgbm_lib",
+ srcs = glob(["**/*.py"]),
+)
+
+
diff --git a/python/ray/util/lightgbm/__init__.py b/python/ray/util/lightgbm/__init__.py
@@ -0,0 +1,18 @@
+import logging
+
+logger = logging.getLogger(__name__)
+
+train = None
+predict = None
+RayParams = None
+RayDMatrix = None
+RayFileType = None
+
+try:
+    from lightgbm_ray import train, predict, RayParams, RayDMatrix, RayFileType
+except ImportError:
+    logger.info(
+        "lightgbm_ray is not installed. Please run "
+        "`pip install git+https://github.com/ray-project/lightgbm_ray`.")
+
+__all__ = ["train", "predict", "RayParams", "RayDMatrix", "RayFileType"]
diff --git a/python/ray/util/lightgbm/release_test_util.py b/python/ray/util/lightgbm/release_test_util.py
@@ -0,0 +1,149 @@
+import glob
+import os
+import time
+
+import ray
+
+from lightgbm_ray import train, RayDMatrix, RayFileType, \
+    RayParams, RayDeviceQuantileDMatrix
+from lightgbm_ray.tune import _TuneLGBMRank0Mixin
+from lightgbm.callback import CallbackEnv
+
+if "OMP_NUM_THREADS" in os.environ:
+    del os.environ["OMP_NUM_THREADS"]
+
+
+@ray.remote
+class FailureState:
+    def __init__(self):
+        self._failed_ids = set()
+
+    def set_failed(self, id):
+        if id in self._failed_ids:
+            return False
+        self._failed_ids.add(id)
+        return True
+
+    def has_failed(self, id):
+        return id in self._failed_ids
+
+
+class FailureInjection(_TuneLGBMRank0Mixin):
+    def __init__(self, id, state, ranks, iteration):
+        self._id = id
+        self._state = state
+        self._ranks = ranks or []
+        self._iteration = iteration
+
+    def __call__(self, env: CallbackEnv):
+        if env.iteration == self._iteration:
+            rank = 0 if self.is_rank_0 else 1
+            if rank in self._ranks:
+                if not ray.get(self._state.has_failed.remote(self._id)):
+                    success = ray.get(self._state.set_failed.remote(self._id))
+                    if not success:
+                        # Another rank is already about to fail
+                        return
+
+                    pid = os.getpid()
+                    print(f"Killing process: {pid} for actor rank {rank}")
+                    time.sleep(1)
+                    os.kill(pid, 9)
+
+    order = 2
+
+
+class TrackingCallback(_TuneLGBMRank0Mixin):
+    def __call__(self, env: CallbackEnv):
+        if self.is_rank_0:
+            print(f"[Rank 0] I am at iteration {env.iteration}")
+
+    order = 1
+
+
+def train_ray(path,
+              num_workers,
+              num_boost_rounds,
+              num_files=0,
+              regression=False,
+              use_gpu=False,
+              ray_params=None,
+              lightgbm_params=None,
+              **kwargs):
+    path = os.path.expanduser(path)
+    if not os.path.exists(path):
+        raise ValueError(f"Path does not exist: {path}")
+
+    if num_files:
+        files = sorted(glob.glob(f"{path}/**/*.parquet"))
+        while num_files > len(files):
+            files = files + files
+        path = files[0:num_files]
+
+    use_device_matrix = False
+    if use_gpu:
+        try:
+            import cupy  # noqa: F401
+            use_device_matrix = True
+        except ImportError:
+            use_device_matrix = False
+
+    if use_device_matrix:
+        dtrain = RayDeviceQuantileDMatrix(
+            path,
+            num_actors=num_workers,
+            label="labels",
+            ignore=["partition"],
+            filetype=RayFileType.PARQUET)
+    else:
+        dtrain = RayDMatrix(
+            path,
+            num_actors=num_workers,
+            label="labels",
+            ignore=["partition"],
+            filetype=RayFileType.PARQUET)
+
+    config = {"device": "cpu" if not use_gpu else "gpu"}
+
+    if not regression:
+        # Classification
+        config.update({
+            "objective": "binary",
+            "metric": ["binary_logloss", "binary_error"],
+        })
+    else:
+        # Regression
+        config.update({
+            "objective": "regression",
+            "metric": ["l2", "rmse"],
+        })
+
+    if lightgbm_params:
+        config.update(lightgbm_params)
+
+    start = time.time()
+    evals_result = {}
+    additional_results = {}
+    bst = train(
+        config,
+        dtrain,
+        evals_result=evals_result,
+        additional_results=additional_results,
+        num_boost_round=num_boost_rounds,
+        ray_params=ray_params or RayParams(
+            max_actor_restarts=2,
+            num_actors=num_workers,
+            cpus_per_actor=2,
+            gpus_per_actor=0 if not use_gpu else 1),
+        evals=[(dtrain, "train")],
+        **kwargs)
+    taken = time.time() - start
+    print(f"TRAIN TIME TAKEN: {taken:.2f} seconds")
+
+    out_file = os.path.expanduser(
+        "~/benchmark_{}.lgbm".format("cpu" if not use_gpu else "gpu"))
+    bst.booster_.save_model(out_file)
+
+    print("Final training error: {:.4f}".format(evals_result["train"][
+        "binary_error" if not regression else "rmse"][-1]))
+    return bst, additional_results, taken
diff --git a/python/ray/util/lightgbm/simple_example.py b/python/ray/util/lightgbm/simple_example.py
@@ -0,0 +1,44 @@
+from sklearn import datasets
+from sklearn.model_selection import train_test_split
+
+from ray.util.lightgbm import RayDMatrix, RayParams, train
+
+
+# __lightgbm_begin__
+def main():
+    # Load dataset
+    data, labels = datasets.load_breast_cancer(return_X_y=True)
+    # Split into train and test set
+    train_x, test_x, train_y, test_y = train_test_split(
+        data, labels, test_size=0.25)
+
+    train_set = RayDMatrix(train_x, train_y)
+    test_set = RayDMatrix(test_x, test_y)
+
+    # Set config
+    config = {
+        "objective": "binary",
+        "metric": ["binary_logloss", "binary_error"],
+        "max_depth": 3,
+    }
+
+    evals_result = {}
+
+    # Train the classifier
+    bst = train(
+        config,
+        train_set,
+        evals=[(test_set, "eval")],
+        evals_result=evals_result,
+        ray_params=RayParams(max_actor_restarts=1, num_actors=1),
+        verbose_eval=False)
+
+    bst.booster_.save_model("simple.lgbm")
+    print("Final validation error: {:.4f}".format(
+        evals_result["eval"]["binary_error"][-1]))
+
+
+# __lightgbm_end__
+
+if __name__ == "__main__":
+    main()
diff --git a/python/ray/util/lightgbm/simple_tune.py b/python/ray/util/lightgbm/simple_tune.py
@@ -0,0 +1,95 @@
+from sklearn import datasets
+from sklearn.model_selection import train_test_split
+
+from ray.util.lightgbm import RayDMatrix, RayParams, train
+
+# __train_begin__
+num_cpus_per_actor = 2
+num_actors = 1
+
+
+def train_model(config):
+    # Load dataset
+    data, labels = datasets.load_breast_cancer(return_X_y=True)
+    # Split into train and test set
+    train_x, test_x, train_y, test_y = train_test_split(
+        data, labels, test_size=0.25)
+
+    train_set = RayDMatrix(train_x, train_y)
+    test_set = RayDMatrix(test_x, test_y)
+
+    evals_result = {}
+    bst = train(
+        params=config,
+        dtrain=train_set,
+        evals=[(test_set, "eval")],
+        evals_result=evals_result,
+        verbose_eval=False,
+        ray_params=RayParams(
+            num_actors=num_actors, cpus_per_actor=num_cpus_per_actor))
+    bst.booster_.save_model("model.lgbm")
+
+
+# __train_end__
+
+
+# __load_begin__
+def load_best_model(best_logdir):
+    import lightgbm as lgbm
+    import os
+
+    best_bst = lgbm.Booster(model_file=os.path.join(best_logdir, "model.lgbm"))
+    return best_bst
+
+
+# __load_end__
+
+
+def main():
+    # __tune_begin__
+    from ray import tune
+
+    # Set config
+    config = {
+        "objective": "binary",
+        "metric": ["binary_logloss", "binary_error"],
+        "eta": tune.loguniform(1e-4, 1e-1),
+        "subsample": tune.uniform(0.5, 1.0),
+        "max_depth": tune.randint(1, 9)
+    }
+    # __tune_end__
+
+    # __tune_run_begin__
+    analysis = tune.run(
+        train_model,
+        config=config,
+        metric="eval-binary_error",
+        mode="min",
+        num_samples=4,
+        resources_per_trial={
+            "cpu": 1,
+            "extra_cpu": num_actors * num_cpus_per_actor
+        })
+
+    # Load in the best performing model.
+    best_bst = load_best_model(analysis.best_logdir)
+
+    # Use the following code block instead if using Ray Client.
+    # import ray
+    # if ray.util.client.ray.is_connected():
+    #     # If using Ray Client best_logdir is a directory on the server.
+    #     # So we want to make sure we wrap model loading in a task.
+    #     remote_load_fn = ray.remote(load_best_model)
+    #     best_bst = ray.get(remote_load_fn.remote(analysis.best_logdir))
+
+    # Do something with the best model.
+    _ = best_bst
+
+    accuracy = 1. - analysis.best_result["eval-binary_error"]
+    print(f"Best model parameters: {analysis.best_config}")
+    print(f"Best model total accuracy: {accuracy:.4f}")
+    # __tune_run_end__
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/ray/util/lightgbm/tests/__init__.py b/python/ray/util/lightgbm/tests/__init__.py
diff --git a/python/ray/util/lightgbm/tests/test_client.py b/python/ray/util/lightgbm/tests/test_client.py
@@ -0,0 +1,28 @@
+import pytest
+import sys
+
+import ray
+from ray.util.client.ray_client_helpers import ray_start_client_server
+
+
+@pytest.fixture
+def start_client_server():
+    with ray_start_client_server() as client:
+        yield client
+
+
+def test_simple_example(start_client_server):
+    assert ray.util.client.ray.is_connected()
+    from ray.util.lightgbm.simple_example import main
+    main()
+
+
+def test_simple_tune(start_client_server):
+    assert ray.util.client.ray.is_connected()
+    from ray.util.lightgbm.simple_tune import main
+    main()
+
+
+if __name__ == "__main__":
+    import pytest
+    sys.exit(pytest.main(["-v", __file__]))