From 72992785ae0e4ffb86afdb7a29e9fc7ddac365f3 Mon Sep 17 00:00:00 2001 From: Yunxuan Xiao Date: Fri, 24 Feb 2023 16:30:45 -0800 Subject: [PATCH] [Doc] Add Configuring Batch Predictor section in Predictors User Guides (#32436) * [minor] add num_gpus_per_worker in BatchPredictor example Signed-off-by: Yunxuan Xiao * add configuring batch predictor section Signed-off-by: Yunxuan Xiao * Update doc/source/ray-air/doc_code/predictors.py Co-authored-by: Justin Yu Signed-off-by: Yunxuan Xiao * Update doc/source/ray-air/doc_code/predictors.py Co-authored-by: Justin Yu Signed-off-by: Yunxuan Xiao * fix typo Signed-off-by: Yunxuan Xiao * make examples into 3 subsections Signed-off-by: Yunxuan Xiao * split configure examples into 3 sections Signed-off-by: Yunxuan Xiao * modify BUILD file to acquire GPUs for ci workers Signed-off-by: Yunxuan Xiao * Change BUILD file to enable GPU tests Signed-off-by: Yunxuan Xiao * fix typo Signed-off-by: Yunxuan Xiao * not specify gpu and cpu at the same time Signed-off-by: Yunxuan Xiao * Update BUILD Signed-off-by: Yunxuan Xiao * resolve tf Blas error Signed-off-by: woshiyyya * resolve tf cublas error Signed-off-by: woshiyyya --------- Signed-off-by: Yunxuan Xiao Signed-off-by: Yunxuan Xiao Signed-off-by: Yunxuan Xiao Signed-off-by: woshiyyya Co-authored-by: Yunxuan Xiao Co-authored-by: Justin Yu Co-authored-by: Yunxuan Xiao Signed-off-by: Edward Oakes --- doc/BUILD | 18 +++++- doc/source/ray-air/doc_code/predictors.py | 37 +++++++++++ doc/source/ray-air/examples/BUILD | 11 +++- .../examples/torch_image_batch_pretrained.py | 2 +- doc/source/ray-air/predictors.rst | 62 +++++++++++++++++++ python/ray/train/batch_predictor.py | 3 + 6 files changed, 130 insertions(+), 3 deletions(-) diff --git a/doc/BUILD b/doc/BUILD index 45da5d3a659a..8417eb6c0e01 100644 --- a/doc/BUILD +++ b/doc/BUILD @@ -183,7 +183,10 @@ py_test_run_all_subdirectory( py_test_run_all_subdirectory( size = "large", include = ["source/ray-air/doc_code/*.py"], - exclude = ["source/ray-air/doc_code/hf_trainer.py"], # Too large + exclude = [ + "source/ray-air/doc_code/hf_trainer.py", # Too large + "source/ray-air/doc_code/predictors.py", + ], extra_srcs = [], tags = ["exclusive", "team:ml"], ) @@ -211,3 +214,16 @@ py_test_run_all_subdirectory( extra_srcs = [], tags = ["exclusive", "team:ml"], ) + + +# -------------- +# Run GPU tests +# -------------- + +py_test_run_all_subdirectory( + size = "large", + include = ["source/ray-air/doc_code/predictors.py"], + exclude = [], + extra_srcs = [], + tags = ["exclusive", "team:ml", "ray_air", "gpu"], +) diff --git a/doc/source/ray-air/doc_code/predictors.py b/doc/source/ray-air/doc_code/predictors.py index a97802b73e8e..327d5a95cd4a 100644 --- a/doc/source/ray-air/doc_code/predictors.py +++ b/doc/source/ray-air/doc_code/predictors.py @@ -1,6 +1,11 @@ # flake8: noqa # isort: skip_file + +import os + +os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true" + # __use_predictor_start__ import numpy as np import tensorflow as tf @@ -76,6 +81,38 @@ def calculate_accuracy(df): # Final accuracy: 0.5 # __compute_accuracy_end__ + +# __configure_batch_predictor_cpu_only_start__ +predictions = batch_predictor.predict( + ds, + feature_columns=["feature_1"], + min_scoring_workers=2, + max_scoring_workers=2, + num_cpus_per_worker=3, +) +# __configure_batch_predictor_cpu_only_end__ + +# __configure_batch_predictor_gpu_only_start__ + +predictions = batch_predictor.predict( + ds, + feature_columns=["feature_1"], + min_scoring_workers=2, + max_scoring_workers=2, + num_gpus_per_worker=1, +) +# __configure_batch_predictor_gpu_only_end__ + +# __configure_batch_predictor_scaling_start__ +predictions = batch_predictor.predict( + ds, + feature_columns=["feature_1"], + min_scoring_workers=1, + max_scoring_workers=4, + num_cpus_per_worker=3, +) +# __configure_batch_predictor_scaling_end__ + # __pipelined_prediction_start__ import pandas as pd import ray diff --git a/doc/source/ray-air/examples/BUILD b/doc/source/ray-air/examples/BUILD index 19e4e1056ffb..f9ffb2e66c17 100644 --- a/doc/source/ray-air/examples/BUILD +++ b/doc/source/ray-air/examples/BUILD @@ -15,12 +15,21 @@ filegroup( py_test_run_all_subdirectory( size = "medium", include = ["*.py"], - exclude = [], + exclude = ["torch_image_batch_pretrained.py"], extra_srcs = [], data = ["//doc/source/ray-air/examples:air_examples"], tags = ["exclusive", "team:ml", "ray_air"], ) +py_test_run_all_subdirectory( + size = "medium", + include = ["torch_image_batch_pretrained.py"], + exclude = [], + extra_srcs = [], + data = ["//doc/source/ray-air/examples:air_examples"], + tags = ["exclusive", "team:ml", "ray_air", "gpu"], +) + # -------------------------------------------------------------------- # Test all doc/source/ray-air/examples notebooks. diff --git a/doc/source/ray-air/examples/torch_image_batch_pretrained.py b/doc/source/ray-air/examples/torch_image_batch_pretrained.py index a6fffe077845..ce3faff9b5bb 100644 --- a/doc/source/ray-air/examples/torch_image_batch_pretrained.py +++ b/doc/source/ray-air/examples/torch_image_batch_pretrained.py @@ -25,4 +25,4 @@ ckpt = TorchCheckpoint.from_model(model=model, preprocessor=preprocessor) predictor = BatchPredictor.from_checkpoint(ckpt, TorchPredictor) -predictor.predict(dataset, batch_size=80) +predictor.predict(dataset, batch_size=80, num_gpus_per_worker=1) diff --git a/doc/source/ray-air/predictors.rst b/doc/source/ray-air/predictors.rst index 99463c01b8f5..c0bd0b675a03 100644 --- a/doc/source/ray-air/predictors.rst +++ b/doc/source/ray-air/predictors.rst @@ -75,6 +75,68 @@ Additionally, you can compute metrics from the predictions. Do this by: :start-after: __compute_accuracy_start__ :end-before: __compute_accuracy_end__ + +Configuring Batch Prediction +---------------------------- +To configure the computation resources for your `BatchPredictor`, you have to set the following parameters in `predict()`: + +- `min_scoring_workers` and `max_scoring_workers` + + - The BatchPredictor will internally create an actor pool to autoscale the number of workers from [min, max] to execute your transforms. + + - If not set, the auto-scaling range will be set to [1, inf) by default. + +- `num_gpus_per_worker`: + + - If you want to use GPU for batch prediction, please set this parameter explicitly. + + - If not specified, the BatchPredictor will perform inference on CPUs by default. + +- `num_cpus_per_worker`: + + - Set the number of CPUs for a worker. + +- `separate_gpu_stage`: + + - If using GPUs, whether to use separate stages for GPU inference and data preprocessing. + + - Enabled by default to avoid excessive preprocessing workload on GPU workers. You may disable it if your preprocessor is very lightweight. + +Here are some examples: + +**1. Use multiple CPUs for Batch Prediction:** + +- If `num_gpus_per_worker` not specified, use CPUs for batch prediction by default. + +- Two workers with 3 CPUs each. + +.. literalinclude:: doc_code/predictors.py + :language: python + :start-after: __configure_batch_predictor_cpu_only_start__ + :end-before: __configure_batch_predictor_cpu_only_end__ + +**2. Use multiple GPUs for Batch prediction:** + +- Two workers, each with 1 GPU and 1 CPU (by default). + +.. literalinclude:: doc_code/predictors.py + :language: python + :start-after: __configure_batch_predictor_gpu_only_start__ + :end-before: __configure_batch_predictor_gpu_only_end__ + +**3. Configure Auto-scaling:** + +- Scale from 1 to 4 workers, depending on your dataset size and cluster resources. + +- If no min/max values are provided, `BatchPredictor` will scale from 1 to inf workers by default. + +.. literalinclude:: doc_code/predictors.py + :language: python + :start-after: __configure_batch_predictor_scaling_start__ + :end-before: __configure_batch_predictor_scaling_end__ + + + Batch Inference Examples ------------------------ Below, we provide examples of using common frameworks to do batch inference for different data types: diff --git a/python/ray/train/batch_predictor.py b/python/ray/train/batch_predictor.py index 678da9d401af..a9fe0da466ad 100644 --- a/python/ray/train/batch_predictor.py +++ b/python/ray/train/batch_predictor.py @@ -116,7 +116,10 @@ def predict( min_scoring_workers: Minimum number of scoring actors. max_scoring_workers: If set, specify the maximum number of scoring actors. num_cpus_per_worker: Number of CPUs to allocate per scoring worker. + Set to 1 by default. num_gpus_per_worker: Number of GPUs to allocate per scoring worker. + Set to 0 by default. If you want to use GPUs for inference, please + specify this parameter. separate_gpu_stage: If using GPUs, specifies whether to execute GPU processing in a separate stage (enabled by default). This avoids running expensive preprocessing steps on GPU workers.