Skip to content

Commit

Permalink
[release] LightGBM release tests (#17043)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yard1 authored Jul 14, 2021
1 parent 0f79ebb commit cfc5806
Show file tree
Hide file tree
Showing 26 changed files with 1,199 additions and 0 deletions.
36 changes: 36 additions & 0 deletions python/ray/util/lightgbm/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# --------------------------------------------------------------------
# Tests from the python/ray/util/lightgbm directory.
# Please keep these sorted alphabetically.
# --------------------------------------------------------------------
py_test(
name = "simple_example",
size = "small",
srcs = ["simple_example.py"],
deps = [":lgbm_lib"],
tags = ["exclusive"],
)

py_test(
name = "simple_tune",
size="small",
srcs = ["simple_tune.py"],
deps = [":lgbm_lib"],
tags = ["exclusive"]
)

py_test(
name = "test_client",
size = "small",
srcs = ["tests/test_client.py"],
deps = [":lgbm_lib"],
tags = ["exclusive", "client"]
)

# This is a dummy test dependency that causes the above tests to be
# re-run if any of these files changes.
py_library(
name = "lgbm_lib",
srcs = glob(["**/*.py"]),
)


18 changes: 18 additions & 0 deletions python/ray/util/lightgbm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import logging

logger = logging.getLogger(__name__)

train = None
predict = None
RayParams = None
RayDMatrix = None
RayFileType = None

try:
from lightgbm_ray import train, predict, RayParams, RayDMatrix, RayFileType
except ImportError:
logger.info(
"lightgbm_ray is not installed. Please run "
"`pip install git+https://github.com/ray-project/lightgbm_ray`.")

__all__ = ["train", "predict", "RayParams", "RayDMatrix", "RayFileType"]
149 changes: 149 additions & 0 deletions python/ray/util/lightgbm/release_test_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import glob
import os
import time

import ray

from lightgbm_ray import train, RayDMatrix, RayFileType, \
RayParams, RayDeviceQuantileDMatrix
from lightgbm_ray.tune import _TuneLGBMRank0Mixin
from lightgbm.callback import CallbackEnv

if "OMP_NUM_THREADS" in os.environ:
del os.environ["OMP_NUM_THREADS"]


@ray.remote
class FailureState:
def __init__(self):
self._failed_ids = set()

def set_failed(self, id):
if id in self._failed_ids:
return False
self._failed_ids.add(id)
return True

def has_failed(self, id):
return id in self._failed_ids


class FailureInjection(_TuneLGBMRank0Mixin):
def __init__(self, id, state, ranks, iteration):
self._id = id
self._state = state
self._ranks = ranks or []
self._iteration = iteration

def __call__(self, env: CallbackEnv):
if env.iteration == self._iteration:
rank = 0 if self.is_rank_0 else 1
if rank in self._ranks:
if not ray.get(self._state.has_failed.remote(self._id)):
success = ray.get(self._state.set_failed.remote(self._id))
if not success:
# Another rank is already about to fail
return

pid = os.getpid()
print(f"Killing process: {pid} for actor rank {rank}")
time.sleep(1)
os.kill(pid, 9)

order = 2


class TrackingCallback(_TuneLGBMRank0Mixin):
def __call__(self, env: CallbackEnv):
if self.is_rank_0:
print(f"[Rank 0] I am at iteration {env.iteration}")

order = 1


def train_ray(path,
num_workers,
num_boost_rounds,
num_files=0,
regression=False,
use_gpu=False,
ray_params=None,
lightgbm_params=None,
**kwargs):
path = os.path.expanduser(path)
if not os.path.exists(path):
raise ValueError(f"Path does not exist: {path}")

if num_files:
files = sorted(glob.glob(f"{path}/**/*.parquet"))
while num_files > len(files):
files = files + files
path = files[0:num_files]

use_device_matrix = False
if use_gpu:
try:
import cupy # noqa: F401
use_device_matrix = True
except ImportError:
use_device_matrix = False

if use_device_matrix:
dtrain = RayDeviceQuantileDMatrix(
path,
num_actors=num_workers,
label="labels",
ignore=["partition"],
filetype=RayFileType.PARQUET)
else:
dtrain = RayDMatrix(
path,
num_actors=num_workers,
label="labels",
ignore=["partition"],
filetype=RayFileType.PARQUET)

config = {"device": "cpu" if not use_gpu else "gpu"}

if not regression:
# Classification
config.update({
"objective": "binary",
"metric": ["binary_logloss", "binary_error"],
})
else:
# Regression
config.update({
"objective": "regression",
"metric": ["l2", "rmse"],
})

if lightgbm_params:
config.update(lightgbm_params)

start = time.time()
evals_result = {}
additional_results = {}
bst = train(
config,
dtrain,
evals_result=evals_result,
additional_results=additional_results,
num_boost_round=num_boost_rounds,
ray_params=ray_params or RayParams(
max_actor_restarts=2,
num_actors=num_workers,
cpus_per_actor=2,
gpus_per_actor=0 if not use_gpu else 1),
evals=[(dtrain, "train")],
**kwargs)
taken = time.time() - start
print(f"TRAIN TIME TAKEN: {taken:.2f} seconds")

out_file = os.path.expanduser(
"~/benchmark_{}.lgbm".format("cpu" if not use_gpu else "gpu"))
bst.booster_.save_model(out_file)

print("Final training error: {:.4f}".format(evals_result["train"][
"binary_error" if not regression else "rmse"][-1]))
return bst, additional_results, taken
44 changes: 44 additions & 0 deletions python/ray/util/lightgbm/simple_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from sklearn import datasets
from sklearn.model_selection import train_test_split

from ray.util.lightgbm import RayDMatrix, RayParams, train


# __lightgbm_begin__
def main():
# Load dataset
data, labels = datasets.load_breast_cancer(return_X_y=True)
# Split into train and test set
train_x, test_x, train_y, test_y = train_test_split(
data, labels, test_size=0.25)

train_set = RayDMatrix(train_x, train_y)
test_set = RayDMatrix(test_x, test_y)

# Set config
config = {
"objective": "binary",
"metric": ["binary_logloss", "binary_error"],
"max_depth": 3,
}

evals_result = {}

# Train the classifier
bst = train(
config,
train_set,
evals=[(test_set, "eval")],
evals_result=evals_result,
ray_params=RayParams(max_actor_restarts=1, num_actors=1),
verbose_eval=False)

bst.booster_.save_model("simple.lgbm")
print("Final validation error: {:.4f}".format(
evals_result["eval"]["binary_error"][-1]))


# __lightgbm_end__

if __name__ == "__main__":
main()
95 changes: 95 additions & 0 deletions python/ray/util/lightgbm/simple_tune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from sklearn import datasets
from sklearn.model_selection import train_test_split

from ray.util.lightgbm import RayDMatrix, RayParams, train

# __train_begin__
num_cpus_per_actor = 2
num_actors = 1


def train_model(config):
# Load dataset
data, labels = datasets.load_breast_cancer(return_X_y=True)
# Split into train and test set
train_x, test_x, train_y, test_y = train_test_split(
data, labels, test_size=0.25)

train_set = RayDMatrix(train_x, train_y)
test_set = RayDMatrix(test_x, test_y)

evals_result = {}
bst = train(
params=config,
dtrain=train_set,
evals=[(test_set, "eval")],
evals_result=evals_result,
verbose_eval=False,
ray_params=RayParams(
num_actors=num_actors, cpus_per_actor=num_cpus_per_actor))
bst.booster_.save_model("model.lgbm")


# __train_end__


# __load_begin__
def load_best_model(best_logdir):
import lightgbm as lgbm
import os

best_bst = lgbm.Booster(model_file=os.path.join(best_logdir, "model.lgbm"))
return best_bst


# __load_end__


def main():
# __tune_begin__
from ray import tune

# Set config
config = {
"objective": "binary",
"metric": ["binary_logloss", "binary_error"],
"eta": tune.loguniform(1e-4, 1e-1),
"subsample": tune.uniform(0.5, 1.0),
"max_depth": tune.randint(1, 9)
}
# __tune_end__

# __tune_run_begin__
analysis = tune.run(
train_model,
config=config,
metric="eval-binary_error",
mode="min",
num_samples=4,
resources_per_trial={
"cpu": 1,
"extra_cpu": num_actors * num_cpus_per_actor
})

# Load in the best performing model.
best_bst = load_best_model(analysis.best_logdir)

# Use the following code block instead if using Ray Client.
# import ray
# if ray.util.client.ray.is_connected():
# # If using Ray Client best_logdir is a directory on the server.
# # So we want to make sure we wrap model loading in a task.
# remote_load_fn = ray.remote(load_best_model)
# best_bst = ray.get(remote_load_fn.remote(analysis.best_logdir))

# Do something with the best model.
_ = best_bst

accuracy = 1. - analysis.best_result["eval-binary_error"]
print(f"Best model parameters: {analysis.best_config}")
print(f"Best model total accuracy: {accuracy:.4f}")
# __tune_run_end__


if __name__ == "__main__":
main()
Empty file.
28 changes: 28 additions & 0 deletions python/ray/util/lightgbm/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pytest
import sys

import ray
from ray.util.client.ray_client_helpers import ray_start_client_server


@pytest.fixture
def start_client_server():
with ray_start_client_server() as client:
yield client


def test_simple_example(start_client_server):
assert ray.util.client.ray.is_connected()
from ray.util.lightgbm.simple_example import main
main()


def test_simple_tune(start_client_server):
assert ray.util.client.ray.is_connected()
from ray.util.lightgbm.simple_tune import main
main()


if __name__ == "__main__":
import pytest
sys.exit(pytest.main(["-v", __file__]))
Loading

0 comments on commit cfc5806

Please sign in to comment.