-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[release] LightGBM release tests (#17043)
- Loading branch information
Showing
26 changed files
with
1,199 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# -------------------------------------------------------------------- | ||
# Tests from the python/ray/util/lightgbm directory. | ||
# Please keep these sorted alphabetically. | ||
# -------------------------------------------------------------------- | ||
py_test( | ||
name = "simple_example", | ||
size = "small", | ||
srcs = ["simple_example.py"], | ||
deps = [":lgbm_lib"], | ||
tags = ["exclusive"], | ||
) | ||
|
||
py_test( | ||
name = "simple_tune", | ||
size="small", | ||
srcs = ["simple_tune.py"], | ||
deps = [":lgbm_lib"], | ||
tags = ["exclusive"] | ||
) | ||
|
||
py_test( | ||
name = "test_client", | ||
size = "small", | ||
srcs = ["tests/test_client.py"], | ||
deps = [":lgbm_lib"], | ||
tags = ["exclusive", "client"] | ||
) | ||
|
||
# This is a dummy test dependency that causes the above tests to be | ||
# re-run if any of these files changes. | ||
py_library( | ||
name = "lgbm_lib", | ||
srcs = glob(["**/*.py"]), | ||
) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import logging | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
train = None | ||
predict = None | ||
RayParams = None | ||
RayDMatrix = None | ||
RayFileType = None | ||
|
||
try: | ||
from lightgbm_ray import train, predict, RayParams, RayDMatrix, RayFileType | ||
except ImportError: | ||
logger.info( | ||
"lightgbm_ray is not installed. Please run " | ||
"`pip install git+https://github.com/ray-project/lightgbm_ray`.") | ||
|
||
__all__ = ["train", "predict", "RayParams", "RayDMatrix", "RayFileType"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
import glob | ||
import os | ||
import time | ||
|
||
import ray | ||
|
||
from lightgbm_ray import train, RayDMatrix, RayFileType, \ | ||
RayParams, RayDeviceQuantileDMatrix | ||
from lightgbm_ray.tune import _TuneLGBMRank0Mixin | ||
from lightgbm.callback import CallbackEnv | ||
|
||
if "OMP_NUM_THREADS" in os.environ: | ||
del os.environ["OMP_NUM_THREADS"] | ||
|
||
|
||
@ray.remote | ||
class FailureState: | ||
def __init__(self): | ||
self._failed_ids = set() | ||
|
||
def set_failed(self, id): | ||
if id in self._failed_ids: | ||
return False | ||
self._failed_ids.add(id) | ||
return True | ||
|
||
def has_failed(self, id): | ||
return id in self._failed_ids | ||
|
||
|
||
class FailureInjection(_TuneLGBMRank0Mixin): | ||
def __init__(self, id, state, ranks, iteration): | ||
self._id = id | ||
self._state = state | ||
self._ranks = ranks or [] | ||
self._iteration = iteration | ||
|
||
def __call__(self, env: CallbackEnv): | ||
if env.iteration == self._iteration: | ||
rank = 0 if self.is_rank_0 else 1 | ||
if rank in self._ranks: | ||
if not ray.get(self._state.has_failed.remote(self._id)): | ||
success = ray.get(self._state.set_failed.remote(self._id)) | ||
if not success: | ||
# Another rank is already about to fail | ||
return | ||
|
||
pid = os.getpid() | ||
print(f"Killing process: {pid} for actor rank {rank}") | ||
time.sleep(1) | ||
os.kill(pid, 9) | ||
|
||
order = 2 | ||
|
||
|
||
class TrackingCallback(_TuneLGBMRank0Mixin): | ||
def __call__(self, env: CallbackEnv): | ||
if self.is_rank_0: | ||
print(f"[Rank 0] I am at iteration {env.iteration}") | ||
|
||
order = 1 | ||
|
||
|
||
def train_ray(path, | ||
num_workers, | ||
num_boost_rounds, | ||
num_files=0, | ||
regression=False, | ||
use_gpu=False, | ||
ray_params=None, | ||
lightgbm_params=None, | ||
**kwargs): | ||
path = os.path.expanduser(path) | ||
if not os.path.exists(path): | ||
raise ValueError(f"Path does not exist: {path}") | ||
|
||
if num_files: | ||
files = sorted(glob.glob(f"{path}/**/*.parquet")) | ||
while num_files > len(files): | ||
files = files + files | ||
path = files[0:num_files] | ||
|
||
use_device_matrix = False | ||
if use_gpu: | ||
try: | ||
import cupy # noqa: F401 | ||
use_device_matrix = True | ||
except ImportError: | ||
use_device_matrix = False | ||
|
||
if use_device_matrix: | ||
dtrain = RayDeviceQuantileDMatrix( | ||
path, | ||
num_actors=num_workers, | ||
label="labels", | ||
ignore=["partition"], | ||
filetype=RayFileType.PARQUET) | ||
else: | ||
dtrain = RayDMatrix( | ||
path, | ||
num_actors=num_workers, | ||
label="labels", | ||
ignore=["partition"], | ||
filetype=RayFileType.PARQUET) | ||
|
||
config = {"device": "cpu" if not use_gpu else "gpu"} | ||
|
||
if not regression: | ||
# Classification | ||
config.update({ | ||
"objective": "binary", | ||
"metric": ["binary_logloss", "binary_error"], | ||
}) | ||
else: | ||
# Regression | ||
config.update({ | ||
"objective": "regression", | ||
"metric": ["l2", "rmse"], | ||
}) | ||
|
||
if lightgbm_params: | ||
config.update(lightgbm_params) | ||
|
||
start = time.time() | ||
evals_result = {} | ||
additional_results = {} | ||
bst = train( | ||
config, | ||
dtrain, | ||
evals_result=evals_result, | ||
additional_results=additional_results, | ||
num_boost_round=num_boost_rounds, | ||
ray_params=ray_params or RayParams( | ||
max_actor_restarts=2, | ||
num_actors=num_workers, | ||
cpus_per_actor=2, | ||
gpus_per_actor=0 if not use_gpu else 1), | ||
evals=[(dtrain, "train")], | ||
**kwargs) | ||
taken = time.time() - start | ||
print(f"TRAIN TIME TAKEN: {taken:.2f} seconds") | ||
|
||
out_file = os.path.expanduser( | ||
"~/benchmark_{}.lgbm".format("cpu" if not use_gpu else "gpu")) | ||
bst.booster_.save_model(out_file) | ||
|
||
print("Final training error: {:.4f}".format(evals_result["train"][ | ||
"binary_error" if not regression else "rmse"][-1])) | ||
return bst, additional_results, taken |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from sklearn import datasets | ||
from sklearn.model_selection import train_test_split | ||
|
||
from ray.util.lightgbm import RayDMatrix, RayParams, train | ||
|
||
|
||
# __lightgbm_begin__ | ||
def main(): | ||
# Load dataset | ||
data, labels = datasets.load_breast_cancer(return_X_y=True) | ||
# Split into train and test set | ||
train_x, test_x, train_y, test_y = train_test_split( | ||
data, labels, test_size=0.25) | ||
|
||
train_set = RayDMatrix(train_x, train_y) | ||
test_set = RayDMatrix(test_x, test_y) | ||
|
||
# Set config | ||
config = { | ||
"objective": "binary", | ||
"metric": ["binary_logloss", "binary_error"], | ||
"max_depth": 3, | ||
} | ||
|
||
evals_result = {} | ||
|
||
# Train the classifier | ||
bst = train( | ||
config, | ||
train_set, | ||
evals=[(test_set, "eval")], | ||
evals_result=evals_result, | ||
ray_params=RayParams(max_actor_restarts=1, num_actors=1), | ||
verbose_eval=False) | ||
|
||
bst.booster_.save_model("simple.lgbm") | ||
print("Final validation error: {:.4f}".format( | ||
evals_result["eval"]["binary_error"][-1])) | ||
|
||
|
||
# __lightgbm_end__ | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from sklearn import datasets | ||
from sklearn.model_selection import train_test_split | ||
|
||
from ray.util.lightgbm import RayDMatrix, RayParams, train | ||
|
||
# __train_begin__ | ||
num_cpus_per_actor = 2 | ||
num_actors = 1 | ||
|
||
|
||
def train_model(config): | ||
# Load dataset | ||
data, labels = datasets.load_breast_cancer(return_X_y=True) | ||
# Split into train and test set | ||
train_x, test_x, train_y, test_y = train_test_split( | ||
data, labels, test_size=0.25) | ||
|
||
train_set = RayDMatrix(train_x, train_y) | ||
test_set = RayDMatrix(test_x, test_y) | ||
|
||
evals_result = {} | ||
bst = train( | ||
params=config, | ||
dtrain=train_set, | ||
evals=[(test_set, "eval")], | ||
evals_result=evals_result, | ||
verbose_eval=False, | ||
ray_params=RayParams( | ||
num_actors=num_actors, cpus_per_actor=num_cpus_per_actor)) | ||
bst.booster_.save_model("model.lgbm") | ||
|
||
|
||
# __train_end__ | ||
|
||
|
||
# __load_begin__ | ||
def load_best_model(best_logdir): | ||
import lightgbm as lgbm | ||
import os | ||
|
||
best_bst = lgbm.Booster(model_file=os.path.join(best_logdir, "model.lgbm")) | ||
return best_bst | ||
|
||
|
||
# __load_end__ | ||
|
||
|
||
def main(): | ||
# __tune_begin__ | ||
from ray import tune | ||
|
||
# Set config | ||
config = { | ||
"objective": "binary", | ||
"metric": ["binary_logloss", "binary_error"], | ||
"eta": tune.loguniform(1e-4, 1e-1), | ||
"subsample": tune.uniform(0.5, 1.0), | ||
"max_depth": tune.randint(1, 9) | ||
} | ||
# __tune_end__ | ||
|
||
# __tune_run_begin__ | ||
analysis = tune.run( | ||
train_model, | ||
config=config, | ||
metric="eval-binary_error", | ||
mode="min", | ||
num_samples=4, | ||
resources_per_trial={ | ||
"cpu": 1, | ||
"extra_cpu": num_actors * num_cpus_per_actor | ||
}) | ||
|
||
# Load in the best performing model. | ||
best_bst = load_best_model(analysis.best_logdir) | ||
|
||
# Use the following code block instead if using Ray Client. | ||
# import ray | ||
# if ray.util.client.ray.is_connected(): | ||
# # If using Ray Client best_logdir is a directory on the server. | ||
# # So we want to make sure we wrap model loading in a task. | ||
# remote_load_fn = ray.remote(load_best_model) | ||
# best_bst = ray.get(remote_load_fn.remote(analysis.best_logdir)) | ||
|
||
# Do something with the best model. | ||
_ = best_bst | ||
|
||
accuracy = 1. - analysis.best_result["eval-binary_error"] | ||
print(f"Best model parameters: {analysis.best_config}") | ||
print(f"Best model total accuracy: {accuracy:.4f}") | ||
# __tune_run_end__ | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import pytest | ||
import sys | ||
|
||
import ray | ||
from ray.util.client.ray_client_helpers import ray_start_client_server | ||
|
||
|
||
@pytest.fixture | ||
def start_client_server(): | ||
with ray_start_client_server() as client: | ||
yield client | ||
|
||
|
||
def test_simple_example(start_client_server): | ||
assert ray.util.client.ray.is_connected() | ||
from ray.util.lightgbm.simple_example import main | ||
main() | ||
|
||
|
||
def test_simple_tune(start_client_server): | ||
assert ray.util.client.ray.is_connected() | ||
from ray.util.lightgbm.simple_tune import main | ||
main() | ||
|
||
|
||
if __name__ == "__main__": | ||
import pytest | ||
sys.exit(pytest.main(["-v", __file__])) |
Oops, something went wrong.