From 47505d464121f4c93ed656d14fe985d16b5956ab Mon Sep 17 00:00:00 2001 From: Antoni Baum Date: Thu, 14 Jul 2022 22:20:21 +0200 Subject: [PATCH] [AIR] Improve `to_air_checkpoint` with path (#26532) Signed-off-by: Stefan van der Kleij --- python/ray/train/lightgbm/utils.py | 22 ++++++++++++++---- python/ray/train/sklearn/utils.py | 20 +++++++++++++--- python/ray/train/tensorflow/utils.py | 4 ++-- .../train/tests/test_lightgbm_predictor.py | 2 +- .../ray/train/tests/test_sklearn_predictor.py | 2 +- .../ray/train/tests/test_xgboost_predictor.py | 2 +- python/ray/train/torch/utils.py | 4 ++-- python/ray/train/xgboost/utils.py | 23 +++++++++++++++---- 8 files changed, 61 insertions(+), 18 deletions(-) diff --git a/python/ray/train/lightgbm/utils.py b/python/ray/train/lightgbm/utils.py index 3f06ae00ad0f..4aa93d46ce0a 100644 --- a/python/ray/train/lightgbm/utils.py +++ b/python/ray/train/lightgbm/utils.py @@ -17,19 +17,33 @@ @PublicAPI(stability="alpha") def to_air_checkpoint( - path: str, booster: lightgbm.Booster, + *, + path: os.PathLike, preprocessor: Optional["Preprocessor"] = None, ) -> Checkpoint: """Convert a pretrained model to AIR checkpoint for serve or inference. + Example: + + .. code-block:: python + + import lightgbm + import tempfile + from ray.train.lightgbm import to_air_checkpoint, LightGBMPredictor + + bst = lightgbm.Booster() + with tempfile.TemporaryDirectory() as tmpdir: + checkpoint = to_air_checkpoint(booster=bst, path=tmpdir) + predictor = LightGBMPredictor.from_checkpoint(checkpoint) + Args: - path: The directory path where model and preprocessor steps are stored to. booster: A pretrained lightgbm model. + path: The directory where the checkpoint will be stored to. preprocessor: A fitted preprocessor. The preprocessing logic will - be applied to serve/inference. + be applied to the inputs for serving/inference. Returns: - A Ray Air checkpoint. + A Ray AIR checkpoint. """ booster.save_model(os.path.join(path, MODEL_KEY)) diff --git a/python/ray/train/sklearn/utils.py b/python/ray/train/sklearn/utils.py index d652ec6e135e..79de2198f3d4 100644 --- a/python/ray/train/sklearn/utils.py +++ b/python/ray/train/sklearn/utils.py @@ -18,17 +18,31 @@ @PublicAPI(stability="alpha") def to_air_checkpoint( - path: str, estimator: BaseEstimator, + *, + path: os.PathLike, preprocessor: Optional["Preprocessor"] = None, ) -> Checkpoint: """Convert a pretrained model to AIR checkpoint for serve or inference. + Example: + + .. code-block:: python + + import tempfile + from sklearn.ensemble import RandomForestClassifier + from ray.train.sklearn import to_air_checkpoint, SklearnPredictor + + est = RandomForestClassifier() + with tempfile.TemporaryDirectory() as tmpdir: + checkpoint = to_air_checkpoint(estimator=est, path=tmpdir) + predictor = SklearnPredictor.from_checkpoint(checkpoint) + Args: - path: The directory path where model and preprocessor steps are stored to. estimator: A pretrained model. + path: The directory where the checkpoint will be stored to. preprocessor: A fitted preprocessor. The preprocessing logic will - be applied to serve/inference. + be applied to the inputs for serving/inference. Returns: A Ray Air checkpoint. """ diff --git a/python/ray/train/tensorflow/utils.py b/python/ray/train/tensorflow/utils.py index d75a95f1bd5a..7fe97ef806fb 100644 --- a/python/ray/train/tensorflow/utils.py +++ b/python/ray/train/tensorflow/utils.py @@ -14,14 +14,14 @@ @PublicAPI(stability="alpha") def to_air_checkpoint( - model: keras.Model, preprocessor: Optional["Preprocessor"] = None + model: keras.Model, *, preprocessor: Optional["Preprocessor"] = None ) -> Checkpoint: """Convert a pretrained model to AIR checkpoint for serve or inference. Args: model: A pretrained model. preprocessor: A fitted preprocessor. The preprocessing logic will - be applied to serve/inference. + be applied to the inputs for serving/inference. Returns: A Ray Air checkpoint. """ diff --git a/python/ray/train/tests/test_lightgbm_predictor.py b/python/ray/train/tests/test_lightgbm_predictor.py index 7428e94cb674..9009f4be006c 100644 --- a/python/ray/train/tests/test_lightgbm_predictor.py +++ b/python/ray/train/tests/test_lightgbm_predictor.py @@ -95,7 +95,7 @@ def test_predict_feature_columns_pandas(): def test_predict_no_preprocessor_no_training(): with tempfile.TemporaryDirectory() as tmpdir: - checkpoint = to_air_checkpoint(tmpdir, booster=model) + checkpoint = to_air_checkpoint(booster=model, path=tmpdir) predictor = LightGBMPredictor.from_checkpoint(checkpoint) data_batch = np.array([[1, 2], [3, 4], [5, 6]]) diff --git a/python/ray/train/tests/test_sklearn_predictor.py b/python/ray/train/tests/test_sklearn_predictor.py index a3c0f3cde9e4..9d3dcf25ee67 100644 --- a/python/ray/train/tests/test_sklearn_predictor.py +++ b/python/ray/train/tests/test_sklearn_predictor.py @@ -146,7 +146,7 @@ def test_batch_prediction_with_set_cpus(ray_start_4_cpus): def test_sklearn_predictor_no_training(): with tempfile.TemporaryDirectory() as tmpdir: - checkpoint = to_air_checkpoint(path=tmpdir, estimator=model) + checkpoint = to_air_checkpoint(estimator=model, path=tmpdir) batch_predictor = BatchPredictor.from_checkpoint(checkpoint, SklearnPredictor) test_dataset = ray.data.from_pandas( pd.DataFrame(dummy_data, columns=["A", "B"]) diff --git a/python/ray/train/tests/test_xgboost_predictor.py b/python/ray/train/tests/test_xgboost_predictor.py index 4c4dddaba252..21496a664727 100644 --- a/python/ray/train/tests/test_xgboost_predictor.py +++ b/python/ray/train/tests/test_xgboost_predictor.py @@ -97,7 +97,7 @@ def test_predict_feature_columns_pandas(): def test_predict_no_preprocessor_no_training(): with tempfile.TemporaryDirectory() as tmpdir: - checkpoint = to_air_checkpoint(tmpdir, booster=model) + checkpoint = to_air_checkpoint(booster=model, path=tmpdir) predictor = XGBoostPredictor.from_checkpoint(checkpoint) data_batch = np.array([[1, 2], [3, 4], [5, 6]]) diff --git a/python/ray/train/torch/utils.py b/python/ray/train/torch/utils.py index ae74503ac8c9..48be6011476d 100644 --- a/python/ray/train/torch/utils.py +++ b/python/ray/train/torch/utils.py @@ -14,14 +14,14 @@ @PublicAPI(stability="alpha") def to_air_checkpoint( - model: torch.nn.Module, preprocessor: Optional["Preprocessor"] = None + model: torch.nn.Module, *, preprocessor: Optional["Preprocessor"] = None ) -> Checkpoint: """Convert a pretrained model to AIR checkpoint for serve or inference. Args: model: A pretrained model. preprocessor: A fitted preprocessor. The preprocessing logic will - be applied to serve/inference. + be applied to the inputs for serving/inference. Returns: A Ray Air checkpoint. """ diff --git a/python/ray/train/xgboost/utils.py b/python/ray/train/xgboost/utils.py index 6dd7a2637575..929588fc8ceb 100644 --- a/python/ray/train/xgboost/utils.py +++ b/python/ray/train/xgboost/utils.py @@ -17,19 +17,34 @@ @PublicAPI(stability="alpha") def to_air_checkpoint( - path: str, booster: xgboost.Booster, + *, + path: os.PathLike, preprocessor: Optional["Preprocessor"] = None, ) -> Checkpoint: """Convert a pretrained model to AIR checkpoint for serve or inference. + Example: + + .. code-block:: python + + import xgboost + import tempfile + from ray.train.xgboost import to_air_checkpoint, XGBoostPredictor + + bst = xgboost.Booster() + with tempfile.TemporaryDirectory() as tmpdir: + checkpoint = to_air_checkpoint(booster=bst, path=tmpdir) + predictor = XGBoostPredictor.from_checkpoint(checkpoint) + Args: - path: The directory path where model and preprocessor steps are stored to. booster: A pretrained xgboost model. + path: The directory where the checkpoint will be stored to. preprocessor: A fitted preprocessor. The preprocessing logic will - be applied to serve/inference. + be applied to the inputs for serving/inference. + Returns: - A Ray Air checkpoint. + A Ray AIR checkpoint. """ booster.save_model(os.path.join(path, MODEL_KEY))