From 2c71cc4d543b00379792f0c89c72921b691611ff Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Thu, 20 Apr 2023 14:40:18 +0200 Subject: [PATCH 1/7] add design Signed-off-by: Max Pumperla --- doc/requirements-doc.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt index 4172bc287d96..3aa5e69bf7bb 100644 --- a/doc/requirements-doc.txt +++ b/doc/requirements-doc.txt @@ -65,6 +65,7 @@ sphinxcontrib-redoc==1.6.0 sphinx-tabs==3.4.0 sphinx-remove-toctrees==0.0.3 autodoc_pydantic==1.6.1 +sphinx_design==0.4.1 # MyST myst-parser==0.15.2 From beeef8d826a3d0f3d847bbe5f009877494835874 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Thu, 20 Apr 2023 14:40:29 +0200 Subject: [PATCH 2/7] activate design Signed-off-by: Max Pumperla --- doc/source/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/conf.py b/doc/source/conf.py index 28c50db32e28..5cdf1397494d 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -58,6 +58,7 @@ "sphinxcontrib.redoc", "sphinx_tabs.tabs", "sphinx_remove_toctrees", + "sphinx_design", ] # Prune deep toc-trees on demand for smaller html and faster builds. From 422ba6efa5aca60e61a8b9f9a6cebc9f226d8fcd Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Thu, 20 Apr 2023 14:40:44 +0200 Subject: [PATCH 3/7] air tabs Signed-off-by: Max Pumperla --- doc/source/ray-air/check-ingest.rst | 202 +++++++------- doc/source/ray-air/computer-vision.rst | 367 +++++++++++++------------ doc/source/ray-air/getting-started.rst | 127 +++++---- doc/source/ray-air/predictors.rst | 31 ++- doc/source/ray-air/trainers.rst | 24 +- doc/source/ray-air/tuner.rst | 22 +- 6 files changed, 402 insertions(+), 371 deletions(-) diff --git a/doc/source/ray-air/check-ingest.rst b/doc/source/ray-air/check-ingest.rst index 1716bd341080..6984c2fad7f0 100644 --- a/doc/source/ray-air/check-ingest.rst +++ b/doc/source/ray-air/check-ingest.rst @@ -74,57 +74,59 @@ Here are some examples of configuring Dataset ingest options and what they do: Enabling Streaming Ingest ~~~~~~~~~~~~~~~~~~~~~~~~~ -.. tabbed:: Bulk Ingest +.. tab-set:: - By default, AIR loads all datasets into the Ray object store at the start of training. - This provides the best performance if the cluster can fit the datasets - entirely in memory, or if the preprocessing step is expensive to run more than once. + .. tab-item:: Bulk Ingest - .. literalinclude:: doc_code/air_ingest.py - :language: python - :start-after: __config_4__ - :end-before: __config_4_end__ + By default, AIR loads all datasets into the Ray object store at the start of training. + This provides the best performance if the cluster can fit the datasets + entirely in memory, or if the preprocessing step is expensive to run more than once. - You should use bulk ingest when: + .. literalinclude:: doc_code/air_ingest.py + :language: python + :start-after: __config_4__ + :end-before: __config_4_end__ - * you have enough memory to fit data blocks in cluster object store; or - * your preprocessing transform is expensive to recompute on each epoch + You should use bulk ingest when: -.. tabbed:: Streaming Ingest (experimental) + * you have enough memory to fit data blocks in cluster object store; or + * your preprocessing transform is expensive to recompute on each epoch - In streaming ingest mode, instead of loading the entire dataset into the - Ray object store at once, AIR will load a fraction of the dataset at a - time. This can be desirable when the dataset is very large, and caching it - all at once would cause expensive disk spilling. The downside is that the - dataset will have to be preprocessed on each epoch, which may be more - expensive. Preprocessing is overlapped with training computation, but - overall training throughput may still decrease if preprocessing is more - expensive than the training computation (forward pass, backward pass, - gradient sync). + .. tab-item:: Streaming Ingest (experimental) - To enable this mode, use the :py:meth:`max_object_store_memory_fraction - ` argument. This argument defaults to -1, - meaning that bulk ingest should be used and the entire dataset should be - computed and cached before training starts. + In streaming ingest mode, instead of loading the entire dataset into the + Ray object store at once, AIR will load a fraction of the dataset at a + time. This can be desirable when the dataset is very large, and caching it + all at once would cause expensive disk spilling. The downside is that the + dataset will have to be preprocessed on each epoch, which may be more + expensive. Preprocessing is overlapped with training computation, but + overall training throughput may still decrease if preprocessing is more + expensive than the training computation (forward pass, backward pass, + gradient sync). - Use a float value 0 or greater to indicate the "window" size, i.e. the - maximum fraction of object store memory that should be used at once. A - reasonable value is 0.2, meaning 20% of available object store memory. - Larger window sizes can improve performance by increasing parallelism. A - window size of 1 or greater will likely result in spilling. + To enable this mode, use the :py:meth:`max_object_store_memory_fraction + ` argument. This argument defaults to -1, + meaning that bulk ingest should be used and the entire dataset should be + computed and cached before training starts. - .. literalinclude:: doc_code/air_ingest.py - :language: python - :start-after: __config_5__ - :end-before: __config_5_end__ + Use a float value 0 or greater to indicate the "window" size, i.e. the + maximum fraction of object store memory that should be used at once. A + reasonable value is 0.2, meaning 20% of available object store memory. + Larger window sizes can improve performance by increasing parallelism. A + window size of 1 or greater will likely result in spilling. - Use streaming ingest when: + .. literalinclude:: doc_code/air_ingest.py + :language: python + :start-after: __config_5__ + :end-before: __config_5_end__ - * you have large datasets that don't fit into memory; and - * re-executing the preprocessing step on each epoch is faster than caching the preprocessed dataset on disk and reloading from disk on each epoch + Use streaming ingest when: - Note that this feature is experimental and the actual object store memory - usage may vary. Please file a `GitHub issue `_ if you run into problems. + * you have large datasets that don't fit into memory; and + * re-executing the preprocessing step on each epoch is faster than caching the preprocessed dataset on disk and reloading from disk on each epoch + + Note that this feature is experimental and the actual object store memory + usage may vary. Please file a `GitHub issue `_ if you run into problems. .. _air-shuffle: @@ -138,50 +140,52 @@ By default, AIR shuffles the assignment of data blocks (files) to dataset shards To randomize data records within a file, perform a local or global shuffle. -.. tabbed:: Local Shuffling +.. tab-set:: + + .. tab-item:: Local Shuffling - Local shuffling is the recommended approach for randomizing data order. To use local shuffle, - simply specify a non-zero ``local_shuffle_buffer_size`` as an argument to :meth:`~ray.data.DataIterator.iter_batches`. - The iterator will then use a local buffer of the given size to randomize record order. The - larger the buffer size, the more randomization will be applied, but it will also use more - memory. + Local shuffling is the recommended approach for randomizing data order. To use local shuffle, + simply specify a non-zero ``local_shuffle_buffer_size`` as an argument to :meth:`~ray.data.DataIterator.iter_batches`. + The iterator will then use a local buffer of the given size to randomize record order. The + larger the buffer size, the more randomization will be applied, but it will also use more + memory. - See :meth:`~ray.data.DataIterator.iter_batches` for more details. + See :meth:`~ray.data.DataIterator.iter_batches` for more details. - .. literalinclude:: doc_code/air_ingest.py - :language: python - :start-after: __local_shuffling_start__ - :end-before: __local_shuffling_end__ + .. literalinclude:: doc_code/air_ingest.py + :language: python + :start-after: __local_shuffling_start__ + :end-before: __local_shuffling_end__ - You should use local shuffling when: + You should use local shuffling when: - * a small in-memory buffer provides enough randomization; or - * you want the highest possible ingest performance; or - * your model is not overly sensitive to shuffle quality + * a small in-memory buffer provides enough randomization; or + * you want the highest possible ingest performance; or + * your model is not overly sensitive to shuffle quality -.. tabbed:: Global Shuffling (slower) + .. tab-item:: Global Shuffling (slower) - Global shuffling provides more uniformly random (decorrelated) samples and is carried - out via a distributed map-reduce operation. This higher quality shuffle can often lead - to more precision gain per training step, but it is also an expensive distributed - operation and will decrease the ingest throughput. The shuffle step is overlapped with - training computation, so as long as the shuffled ingest throughput matches - or exceeds the model training (forward pass, backward pass, gradient sync) - throughput, this higher-quality shuffle shouldn't slow down the overall - training. + Global shuffling provides more uniformly random (decorrelated) samples and is carried + out via a distributed map-reduce operation. This higher quality shuffle can often lead + to more precision gain per training step, but it is also an expensive distributed + operation and will decrease the ingest throughput. The shuffle step is overlapped with + training computation, so as long as the shuffled ingest throughput matches + or exceeds the model training (forward pass, backward pass, gradient sync) + throughput, this higher-quality shuffle shouldn't slow down the overall + training. - If global shuffling *is* causing the ingest throughput to become the training - bottleneck, local shuffling may be a better option. + If global shuffling *is* causing the ingest throughput to become the training + bottleneck, local shuffling may be a better option. - .. literalinclude:: doc_code/air_ingest.py - :language: python - :start-after: __global_shuffling_start__ - :end-before: __global_shuffling_end__ + .. literalinclude:: doc_code/air_ingest.py + :language: python + :start-after: __global_shuffling_start__ + :end-before: __global_shuffling_end__ - You should use global shuffling when: + You should use global shuffling when: - * you suspect high-quality shuffles may significantly improve model quality; and - * absolute ingest performance is less of a concern + * you suspect high-quality shuffles may significantly improve model quality; and + * absolute ingest performance is less of a concern .. _air-per-epoch-preprocessing: @@ -240,43 +244,45 @@ Dataset Resources Datasets uses Ray tasks to execute data processing operations. These tasks use CPU resources in the cluster during execution, which may compete with resources needed for Training. -.. tabbed:: Unreserved CPUs +.. tab-set:: + + .. tab-item:: Unreserved CPUs - By default, Dataset tasks use cluster CPU resources for execution. This can sometimes - conflict with Trainer resource requests. For example, if Trainers allocate all CPU resources - in the cluster, then no Datasets tasks can run. + By default, Dataset tasks use cluster CPU resources for execution. This can sometimes + conflict with Trainer resource requests. For example, if Trainers allocate all CPU resources + in the cluster, then no Datasets tasks can run. - .. literalinclude:: ./doc_code/air_ingest.py - :language: python - :start-after: __resource_allocation_1_begin__ - :end-before: __resource_allocation_1_end__ + .. literalinclude:: ./doc_code/air_ingest.py + :language: python + :start-after: __resource_allocation_1_begin__ + :end-before: __resource_allocation_1_end__ - Unreserved CPUs work well when: + Unreserved CPUs work well when: - * you are running only one Trainer and the cluster has enough CPUs; or - * your Trainers are configured to use GPUs and not CPUs + * you are running only one Trainer and the cluster has enough CPUs; or + * your Trainers are configured to use GPUs and not CPUs -.. tabbed:: Using Reserved CPUs (experimental) + .. tab-item:: Using Reserved CPUs (experimental) - The ``_max_cpu_fraction_per_node`` option can be used to exclude CPUs from placement - group scheduling. In the below example, setting this parameter to ``0.8`` enables Tune - trials to run smoothly without risk of deadlock by reserving 20% of node CPUs for - Dataset execution. + The ``_max_cpu_fraction_per_node`` option can be used to exclude CPUs from placement + group scheduling. In the below example, setting this parameter to ``0.8`` enables Tune + trials to run smoothly without risk of deadlock by reserving 20% of node CPUs for + Dataset execution. - .. literalinclude:: ./doc_code/air_ingest.py - :language: python - :start-after: __resource_allocation_2_begin__ - :end-before: __resource_allocation_2_end__ + .. literalinclude:: ./doc_code/air_ingest.py + :language: python + :start-after: __resource_allocation_2_begin__ + :end-before: __resource_allocation_2_end__ - You should use reserved CPUs when: + You should use reserved CPUs when: - * you are running multiple concurrent CPU Trainers using Tune; or - * you want to ensure predictable Datasets performance + * you are running multiple concurrent CPU Trainers using Tune; or + * you want to ensure predictable Datasets performance - .. warning:: + .. warning:: - ``_max_cpu_fraction_per_node`` is experimental and not currently recommended for use with - autoscaling clusters (scale-up will not trigger properly). + ``_max_cpu_fraction_per_node`` is experimental and not currently recommended for use with + autoscaling clusters (scale-up will not trigger properly). Debugging Ingest with the ``DummyTrainer`` ------------------------------------------ diff --git a/doc/source/ray-air/computer-vision.rst b/doc/source/ray-air/computer-vision.rst index 2fef456f0ee1..de89fd60b596 100644 --- a/doc/source/ray-air/computer-vision.rst +++ b/doc/source/ray-air/computer-vision.rst @@ -14,104 +14,106 @@ This guide explains how to perform common computer vision tasks like: Reading image data ------------------ -.. tabbed:: Raw images +.. tab-set:: - Datasets like ImageNet store files like this: + .. tab-item:: Raw images - .. code-block:: + Datasets like ImageNet store files like this: - root/dog/xxx.png - root/dog/xxy.png - root/dog/[...]/xxz.png + .. code-block:: - root/cat/123.png - root/cat/nsdf3.png - root/cat/[...]/asd932_.png + root/dog/xxx.png + root/dog/xxy.png + root/dog/[...]/xxz.png - To load images stored in this layout, read the raw images and include the - class names. + root/cat/123.png + root/cat/nsdf3.png + root/cat/[...]/asd932_.png - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __read_images1_start__ - :end-before: __read_images1_stop__ - :dedent: + To load images stored in this layout, read the raw images and include the + class names. - Then, apply a :ref:`user-defined function ` to - encode the class names as integer targets. + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __read_images1_start__ + :end-before: __read_images1_stop__ + :dedent: - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __read_images2_start__ - :end-before: __read_images2_stop__ - :dedent: + Then, apply a :ref:`user-defined function ` to + encode the class names as integer targets. - .. tip:: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __read_images2_start__ + :end-before: __read_images2_stop__ + :dedent: - You can also use :class:`~ray.data.preprocessors.LabelEncoder` to encode labels. + .. tip:: -.. tabbed:: NumPy + You can also use :class:`~ray.data.preprocessors.LabelEncoder` to encode labels. - To load NumPy arrays into a :class:`~ray.data.Datastream`, separately read the image and label arrays. + .. tab-item:: NumPy - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __read_numpy1_start__ - :end-before: __read_numpy1_stop__ - :dedent: + To load NumPy arrays into a :class:`~ray.data.Datastream`, separately read the image and label arrays. - Then, combine the datasets and rename the columns. + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __read_numpy1_start__ + :end-before: __read_numpy1_stop__ + :dedent: - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __read_numpy2_start__ - :end-before: __read_numpy2_stop__ - :dedent: + Then, combine the datasets and rename the columns. -.. tabbed:: TFRecords + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __read_numpy2_start__ + :end-before: __read_numpy2_stop__ + :dedent: - Image datasets often contain ``tf.train.Example`` messages that look like this: + .. tab-item:: TFRecords - .. code-block:: + Image datasets often contain ``tf.train.Example`` messages that look like this: - features { - feature { - key: "image" - value { - bytes_list { - value: ... # Raw image bytes + .. code-block:: + + features { + feature { + key: "image" + value { + bytes_list { + value: ... # Raw image bytes + } } } - } - feature { - key: "label" - value { - int64_list { - value: 3 + feature { + key: "label" + value { + int64_list { + value: 3 + } } } } - } - To load examples stored in this format, read the TFRecords into a :class:`~ray.data.Datastream`. + To load examples stored in this format, read the TFRecords into a :class:`~ray.data.Datastream`. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __read_tfrecords1_start__ - :end-before: __read_tfrecords1_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __read_tfrecords1_start__ + :end-before: __read_tfrecords1_stop__ + :dedent: - Then, apply a :ref:`user-defined function ` to - decode the raw image bytes. + Then, apply a :ref:`user-defined function ` to + decode the raw image bytes. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __read_tfrecords2_start__ - :end-before: __read_tfrecords2_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __read_tfrecords2_start__ + :end-before: __read_tfrecords2_stop__ + :dedent: -.. tabbed:: Parquet + .. tab-item:: Parquet - To load image data stored in Parquet files, call :func:`ray.data.read_parquet`. + To load image data stored in Parquet files, call :func:`ray.data.read_parquet`. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __read_parquet_start__ - :end-before: __read_parquet_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __read_parquet_start__ + :end-before: __read_parquet_stop__ + :dedent: For more information on creating datastreams, see :ref:`Creating Datastreams `. @@ -123,33 +125,35 @@ Transforming images To transform images, create a :class:`~ray.data.preprocessor.Preprocessor`. They're the standard way to preprocess data with Ray. -.. tabbed:: Torch +.. tab-set:: + + .. tab-item:: Torch - To apply TorchVision transforms, create a :class:`~ray.data.preprocessors.TorchVisionPreprocessor`. + To apply TorchVision transforms, create a :class:`~ray.data.preprocessors.TorchVisionPreprocessor`. - Create two :class:`TorchVisionPreprocessors ` - -- one to normalize images, and another to augment images. Later, you'll pass the preprocessors to :class:`Trainers `, - :class:`Predictors `, and - :class:`PredictorDeployments `. + Create two :class:`TorchVisionPreprocessors ` + -- one to normalize images, and another to augment images. Later, you'll pass the preprocessors to :class:`Trainers `, + :class:`Predictors `, and + :class:`PredictorDeployments `. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __torch_preprocessors_start__ - :end-before: __torch_preprocessors_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __torch_preprocessors_start__ + :end-before: __torch_preprocessors_stop__ + :dedent: -.. tabbed:: TensorFlow + .. tab-item:: TensorFlow - To apply TorchVision transforms, create a :class:`~ray.data.preprocessors.BatchMapper`. + To apply TorchVision transforms, create a :class:`~ray.data.preprocessors.BatchMapper`. - Create two :class:`~ray.data.preprocessors.BatchMapper` -- one to normalize images, and another to - augment images. Later, you'll pass the preprocessors to :class:`Trainers `, - :class:`Predictors `, and - :class:`PredictorDeployments `. + Create two :class:`~ray.data.preprocessors.BatchMapper` -- one to normalize images, and another to + augment images. Later, you'll pass the preprocessors to :class:`Trainers `, + :class:`Predictors `, and + :class:`PredictorDeployments `. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __tensorflow_preprocessors_start__ - :end-before: __tensorflow_preprocessors_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __tensorflow_preprocessors_start__ + :end-before: __tensorflow_preprocessors_stop__ + :dedent: For more information on transforming data, see :ref:`Using Preprocessors ` and @@ -160,44 +164,46 @@ Training vision models :class:`Trainers ` let you train models in parallel. -.. tabbed:: Torch +.. tab-set:: - To train a vision model, define the training loop per worker. + .. tab-item:: Torch - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __torch_training_loop_start__ - :end-before: __torch_training_loop_stop__ - :dedent: + To train a vision model, define the training loop per worker. - Then, create a :class:`~ray.train.torch.TorchTrainer` and call - :meth:`~ray.train.torch.TorchTrainer.fit`. + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __torch_training_loop_start__ + :end-before: __torch_training_loop_stop__ + :dedent: - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __torch_trainer_start__ - :end-before: __torch_trainer_stop__ - :dedent: + Then, create a :class:`~ray.train.torch.TorchTrainer` and call + :meth:`~ray.train.torch.TorchTrainer.fit`. - For more in-depth examples, read :doc:`/ray-air/examples/torch_image_example` and - :ref:`Using Trainers `. + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __torch_trainer_start__ + :end-before: __torch_trainer_stop__ + :dedent: -.. tabbed:: TensorFlow + For more in-depth examples, read :doc:`/ray-air/examples/torch_image_example` and + :ref:`Using Trainers `. - To train a vision model, define the training loop per worker. + .. tab-item:: TensorFlow - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __tensorflow_training_loop_start__ - :end-before: __tensorflow_training_loop_stop__ - :dedent: + To train a vision model, define the training loop per worker. - Then, create a :class:`~ray.train.tensorflow.TensorflowTrainer` and call - :meth:`~ray.train.tensorflow.TensorflowTrainer.fit`. + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __tensorflow_training_loop_start__ + :end-before: __tensorflow_training_loop_stop__ + :dedent: - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __tensorflow_trainer_start__ - :end-before: __tensorflow_trainer_stop__ - :dedent: + Then, create a :class:`~ray.train.tensorflow.TensorflowTrainer` and call + :meth:`~ray.train.tensorflow.TensorflowTrainer.fit`. - For more information, read :ref:`Using Trainers `. + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __tensorflow_trainer_start__ + :end-before: __tensorflow_trainer_stop__ + :dedent: + + For more information, read :ref:`Using Trainers `. Creating checkpoints -------------------- @@ -210,27 +216,29 @@ If you're going from training to prediction, don't create a new checkpoint. :class:`~ray.air.result.Result` object. Use :attr:`Result.checkpoint ` instead. -.. tabbed:: Torch +.. tab-set:: + + .. tab-item:: Torch - To create a :class:`~ray.train.torch.TorchCheckpoint`, pass a Torch model and - the :class:`~ray.data.preprocessor.Preprocessor` you created in `Transforming images`_ - to :meth:`TorchCheckpoint.from_model() `. + To create a :class:`~ray.train.torch.TorchCheckpoint`, pass a Torch model and + the :class:`~ray.data.preprocessor.Preprocessor` you created in `Transforming images`_ + to :meth:`TorchCheckpoint.from_model() `. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __torch_checkpoint_start__ - :end-before: __torch_checkpoint_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __torch_checkpoint_start__ + :end-before: __torch_checkpoint_stop__ + :dedent: -.. tabbed:: TensorFlow + .. tab-item:: TensorFlow - To create a :class:`~ray.train.tensorflow.TensorflowCheckpoint`, pass a TensorFlow model and - the :class:`~ray.data.preprocessor.Preprocessor` you created in `Transforming images`_ - to :meth:`TensorflowCheckpoint.from_model() `. + To create a :class:`~ray.train.tensorflow.TensorflowCheckpoint`, pass a TensorFlow model and + the :class:`~ray.data.preprocessor.Preprocessor` you created in `Transforming images`_ + to :meth:`TensorflowCheckpoint.from_model() `. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __tensorflow_checkpoint_start__ - :end-before: __tensorflow_checkpoint_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __tensorflow_checkpoint_start__ + :end-before: __tensorflow_checkpoint_stop__ + :dedent: Batch predicting images @@ -239,32 +247,34 @@ Batch predicting images :class:`~ray.train.batch_predictor.BatchPredictor` lets you perform inference on large image datasets. -.. tabbed:: Torch +.. tab-set:: + + .. tab-item:: Torch - To create a :class:`~ray.train.batch_predictor.BatchPredictor`, call - :meth:`BatchPredictor.from_checkpoint ` and pass the checkpoint - you created in `Creating checkpoints`_. + To create a :class:`~ray.train.batch_predictor.BatchPredictor`, call + :meth:`BatchPredictor.from_checkpoint ` and pass the checkpoint + you created in `Creating checkpoints`_. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __torch_batch_predictor_start__ - :end-before: __torch_batch_predictor_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __torch_batch_predictor_start__ + :end-before: __torch_batch_predictor_stop__ + :dedent: - For more in-depth examples, read :doc:`/ray-air/examples/pytorch_resnet_batch_prediction` - and :ref:`Using Predictors for Inference `. + For more in-depth examples, read :doc:`/ray-air/examples/pytorch_resnet_batch_prediction` + and :ref:`Using Predictors for Inference `. -.. tabbed:: TensorFlow + .. tab-item:: TensorFlow - To create a :class:`~ray.train.batch_predictor.BatchPredictor`, call - :meth:`BatchPredictor.from_checkpoint ` and pass the checkpoint - you created in `Creating checkpoints`_. + To create a :class:`~ray.train.batch_predictor.BatchPredictor`, call + :meth:`BatchPredictor.from_checkpoint ` and pass the checkpoint + you created in `Creating checkpoints`_. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __tensorflow_batch_predictor_start__ - :end-before: __tensorflow_batch_predictor_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __tensorflow_batch_predictor_start__ + :end-before: __tensorflow_batch_predictor_stop__ + :dedent: - For more information, read :ref:`Using Predictors for Inference `. + For more information, read :ref:`Using Predictors for Inference `. Serving vision models --------------------- @@ -286,44 +296,45 @@ To NumPy ndarrays like this: array([[1., 2.], [3., 4.]]) +.. tab-set:: -.. tabbed:: Torch + .. tab-item:: Torch - To deploy a Torch model to an endpoint, pass the checkpoint you created in `Creating checkpoints`_ - to :meth:`PredictorDeployment.bind ` and specify - :func:`~ray.serve.http_adapters.json_to_ndarray` as the HTTP adapter. + To deploy a Torch model to an endpoint, pass the checkpoint you created in `Creating checkpoints`_ + to :meth:`PredictorDeployment.bind ` and specify + :func:`~ray.serve.http_adapters.json_to_ndarray` as the HTTP adapter. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __torch_serve_start__ - :end-before: __torch_serve_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __torch_serve_start__ + :end-before: __torch_serve_stop__ + :dedent: - Then, make a request to classify an image. + Then, make a request to classify an image. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __torch_online_predict_start__ - :end-before: __torch_online_predict_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __torch_online_predict_start__ + :end-before: __torch_online_predict_stop__ + :dedent: - For more in-depth examples, read :doc:`/ray-air/examples/torch_image_example` - and :doc:`/ray-air/examples/serving_guide`. + For more in-depth examples, read :doc:`/ray-air/examples/torch_image_example` + and :doc:`/ray-air/examples/serving_guide`. -.. tabbed:: TensorFlow + .. tab-item:: TensorFlow - To deploy a TensorFlow model to an endpoint, pass the checkpoint you created in `Creating checkpoints`_ - to :meth:`PredictorDeployment.bind ` and specify - :func:`~ray.serve.http_adapters.json_to_multi_ndarray` as the HTTP adapter. + To deploy a TensorFlow model to an endpoint, pass the checkpoint you created in `Creating checkpoints`_ + to :meth:`PredictorDeployment.bind ` and specify + :func:`~ray.serve.http_adapters.json_to_multi_ndarray` as the HTTP adapter. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __tensorflow_serve_start__ - :end-before: __tensorflow_serve_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __tensorflow_serve_start__ + :end-before: __tensorflow_serve_stop__ + :dedent: - Then, make a request to classify an image. + Then, make a request to classify an image. - .. literalinclude:: ./doc_code/computer_vision.py - :start-after: __tensorflow_online_predict_start__ - :end-before: __tensorflow_online_predict_stop__ - :dedent: + .. literalinclude:: ./doc_code/computer_vision.py + :start-after: __tensorflow_online_predict_start__ + :end-before: __tensorflow_online_predict_stop__ + :dedent: - For more information, read :doc:`/ray-air/examples/serving_guide`. + For more information, read :doc:`/ray-air/examples/serving_guide`. diff --git a/doc/source/ray-air/getting-started.rst b/doc/source/ray-air/getting-started.rst index aa998f69dc71..abb5fc29068a 100644 --- a/doc/source/ray-air/getting-started.rst +++ b/doc/source/ray-air/getting-started.rst @@ -84,78 +84,84 @@ First, let's start by loading a dataset from storage: Then, we define a ``Preprocessor`` pipeline for our task: -.. tabbed:: XGBoost +.. tab-set:: - .. literalinclude:: examples/xgboost_starter.py - :language: python - :start-after: __air_xgb_preprocess_start__ - :end-before: __air_xgb_preprocess_end__ + .. tab-item:: XGBoost -.. tabbed:: Pytorch + .. literalinclude:: examples/xgboost_starter.py + :language: python + :start-after: __air_xgb_preprocess_start__ + :end-before: __air_xgb_preprocess_end__ - .. literalinclude:: examples/pytorch_tabular_starter.py - :language: python - :start-after: __air_pytorch_preprocess_start__ - :end-before: __air_pytorch_preprocess_end__ + .. tab-item:: Pytorch -.. tabbed:: Tensorflow + .. literalinclude:: examples/pytorch_tabular_starter.py + :language: python + :start-after: __air_pytorch_preprocess_start__ + :end-before: __air_pytorch_preprocess_end__ - .. literalinclude:: examples/tf_tabular_starter.py - :language: python - :start-after: __air_tf_preprocess_start__ - :end-before: __air_tf_preprocess_end__ + .. tab-item:: Tensorflow + + .. literalinclude:: examples/tf_tabular_starter.py + :language: python + :start-after: __air_tf_preprocess_start__ + :end-before: __air_tf_preprocess_end__ Training ~~~~~~~~ Train a model with a ``Trainer`` with common ML frameworks: -.. tabbed:: XGBoost +.. tab-set:: + + .. tab-item:: XGBoost - .. literalinclude:: examples/xgboost_starter.py - :language: python - :start-after: __air_xgb_train_start__ - :end-before: __air_xgb_train_end__ + .. literalinclude:: examples/xgboost_starter.py + :language: python + :start-after: __air_xgb_train_start__ + :end-before: __air_xgb_train_end__ -.. tabbed:: Pytorch + .. tab-item:: Pytorch - .. literalinclude:: examples/pytorch_tabular_starter.py - :language: python - :start-after: __air_pytorch_train_start__ - :end-before: __air_pytorch_train_end__ + .. literalinclude:: examples/pytorch_tabular_starter.py + :language: python + :start-after: __air_pytorch_train_start__ + :end-before: __air_pytorch_train_end__ -.. tabbed:: Tensorflow + .. tab-item:: Tensorflow - .. literalinclude:: examples/tf_tabular_starter.py - :language: python - :start-after: __air_tf_train_start__ - :end-before: __air_tf_train_end__ + .. literalinclude:: examples/tf_tabular_starter.py + :language: python + :start-after: __air_tf_train_start__ + :end-before: __air_tf_train_end__ Hyperparameter Tuning ~~~~~~~~~~~~~~~~~~~~~ You can specify a hyperparameter space to search over for each trainer: -.. tabbed:: XGBoost +.. tab-set:: + + .. tab-item:: XGBoost - .. literalinclude:: examples/xgboost_starter.py - :language: python - :start-after: __air_xgb_tuner_start__ - :end-before: __air_xgb_tuner_end__ + .. literalinclude:: examples/xgboost_starter.py + :language: python + :start-after: __air_xgb_tuner_start__ + :end-before: __air_xgb_tuner_end__ -.. tabbed:: Pytorch + .. tab-item:: Pytorch - .. literalinclude:: examples/pytorch_tabular_starter.py - :language: python - :start-after: __air_pytorch_tuner_start__ - :end-before: __air_pytorch_tuner_end__ + .. literalinclude:: examples/pytorch_tabular_starter.py + :language: python + :start-after: __air_pytorch_tuner_start__ + :end-before: __air_pytorch_tuner_end__ -.. tabbed:: Tensorflow + .. tab-item:: Tensorflow - .. literalinclude:: examples/tf_tabular_starter.py - :language: python - :start-after: __air_tf_tuner_start__ - :end-before: __air_tf_tuner_end__ + .. literalinclude:: examples/tf_tabular_starter.py + :language: python + :start-after: __air_tf_tuner_start__ + :end-before: __air_tf_tuner_end__ Then use the ``Tuner`` to run the search: @@ -169,27 +175,28 @@ Batch Inference Use the trained model for scalable batch prediction with a ``BatchPredictor``. -.. tabbed:: XGBoost +.. tab-set:: - .. literalinclude:: examples/xgboost_starter.py - :language: python - :start-after: __air_xgb_batchpred_start__ - :end-before: __air_xgb_batchpred_end__ + .. tab-item:: XGBoost -.. tabbed:: Pytorch + .. literalinclude:: examples/xgboost_starter.py + :language: python + :start-after: __air_xgb_batchpred_start__ + :end-before: __air_xgb_batchpred_end__ - .. literalinclude:: examples/pytorch_tabular_starter.py - :language: python - :start-after: __air_pytorch_batchpred_start__ - :end-before: __air_pytorch_batchpred_end__ + .. tab-item:: Pytorch -.. tabbed:: Tensorflow + .. literalinclude:: examples/pytorch_tabular_starter.py + :language: python + :start-after: __air_pytorch_batchpred_start__ + :end-before: __air_pytorch_batchpred_end__ - .. literalinclude:: examples/tf_tabular_starter.py - :language: python - :start-after: __air_tf_batchpred_start__ - :end-before: __air_tf_batchpred_end__ + .. tab-item:: Tensorflow + .. literalinclude:: examples/tf_tabular_starter.py + :language: python + :start-after: __air_tf_batchpred_start__ + :end-before: __air_tf_batchpred_end__ Project Status -------------- diff --git a/doc/source/ray-air/predictors.rst b/doc/source/ray-air/predictors.rst index e281e18554d3..16c1545d45d8 100644 --- a/doc/source/ray-air/predictors.rst +++ b/doc/source/ray-air/predictors.rst @@ -146,34 +146,37 @@ Below, we provide examples of using common frameworks to do batch inference for Tabular ~~~~~~~ -.. tabbed:: XGBoost +.. tab-set:: - .. literalinclude:: examples/xgboost_batch_prediction.py - :language: python + .. tab-item:: XGBoost -.. tabbed:: Pytorch + .. literalinclude:: examples/xgboost_batch_prediction.py + :language: python - .. literalinclude:: examples/pytorch_tabular_batch_prediction.py - :language: python + .. tab-item:: Pytorch -.. tabbed:: Tensorflow + .. literalinclude:: examples/pytorch_tabular_batch_prediction.py + :language: python - .. literalinclude:: examples/tf_tabular_batch_prediction.py - :language: python + .. tab-item:: Tensorflow + .. literalinclude:: examples/tf_tabular_batch_prediction.py + :language: python Image ~~~~~ -.. tabbed:: Pytorch +.. tab-set:: - .. literalinclude:: examples/torch_image_batch_pretrained.py - :language: python + .. tab-item:: Pytorch + + .. literalinclude:: examples/torch_image_batch_pretrained.py + :language: python -.. tabbed:: Tensorflow + .. tab-item:: Tensorflow - Coming soon! + Coming soon! Text ~~~~ diff --git a/doc/source/ray-air/trainers.rst b/doc/source/ray-air/trainers.rst index ea08fd7352b2..9022ed097ad4 100644 --- a/doc/source/ray-air/trainers.rst +++ b/doc/source/ray-air/trainers.rst @@ -67,22 +67,24 @@ Read more about :ref:`Ray Train's Deep Learning Trainers `. .. dropdown:: Code examples - .. tabbed:: Torch + .. tab-set:: - .. literalinclude:: doc_code/torch_trainer.py - :language: python + .. tab-item:: Torch - .. tabbed:: Tensorflow + .. literalinclude:: doc_code/torch_trainer.py + :language: python - .. literalinclude:: doc_code/tf_starter.py - :language: python - :start-after: __air_tf_train_start__ - :end-before: __air_tf_train_end__ + .. tab-item:: Tensorflow - .. tabbed:: Horovod + .. literalinclude:: doc_code/tf_starter.py + :language: python + :start-after: __air_tf_train_start__ + :end-before: __air_tf_train_end__ - .. literalinclude:: doc_code/hvd_trainer.py - :language: python + .. tab-item:: Horovod + + .. literalinclude:: doc_code/hvd_trainer.py + :language: python How to report metrics and checkpoints? diff --git a/doc/source/ray-air/tuner.rst b/doc/source/ray-air/tuner.rst index 3bba0b1d47a6..43f14c470c0f 100644 --- a/doc/source/ray-air/tuner.rst +++ b/doc/source/ray-air/tuner.rst @@ -63,19 +63,21 @@ Depending on the model and dataset, you may want to tune: The following shows some example code on how to specify the ``param_space``. -.. tabbed:: XGBoost +.. tab-set:: - .. literalinclude:: doc_code/tuner.py - :language: python - :start-after: __xgboost_start__ - :end-before: __xgboost_end__ + .. tab-item:: XGBoost -.. tabbed:: Pytorch + .. literalinclude:: doc_code/tuner.py + :language: python + :start-after: __xgboost_start__ + :end-before: __xgboost_end__ - .. literalinclude:: doc_code/tuner.py - :language: python - :start-after: __torch_start__ - :end-before: __torch_end__ + .. tab-item:: Pytorch + + .. literalinclude:: doc_code/tuner.py + :language: python + :start-after: __torch_start__ + :end-before: __torch_end__ Read more about :ref:`Tune search spaces here `. From 11db5b5e5ef3093ffd32ea05f47ab174a537d79d Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Thu, 20 Apr 2023 16:00:30 +0200 Subject: [PATCH 4/7] grids and buttons Signed-off-by: Max Pumperla --- doc/source/data/examples/index.rst | 67 +++++++++++++++++++----------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/doc/source/data/examples/index.rst b/doc/source/data/examples/index.rst index 62750e4885eb..8d780367fff8 100644 --- a/doc/source/data/examples/index.rst +++ b/doc/source/data/examples/index.rst @@ -17,35 +17,52 @@ modalities and types. Here you will find a few end-to-end examples of some basic processing with Ray Data on tabular data, text (coming soon!), and imagery (coming soon!). -.. panels:: - :container: container pb-4 - :column: col-md-4 px-2 py-2 - :img-top-cls: pt-5 w-75 d-block mx-auto +.. grid:: 3 + :gutter: 2 + :class-container: container pb-4 - --- - :img-top: /images/taxi.png + .. grid-item-card:: + :img-top: /images/taxi.png + :class-img-top: pt-5 w-75 d-block mx-auto - +++ - .. link-button:: nyc_taxi_basic_processing - :type: ref - :text: Processing the NYC taxi dataset - :classes: btn-link btn-block stretched-link - --- - :img-top: /images/taxi.png + +++ + .. button-ref:: nyc_taxi_basic_processing + :ref-type: ref + :color: primary + :outline: + :expand: + :click-parent: - +++ - .. link-button:: batch_training - :type: ref - :text: Batch Training with Ray Data - :classes: btn-link btn-block stretched-link - --- - :img-top: /images/ocr.jpg + Processing the NYC taxi dataset + + .. grid-item-card:: + :img-top: /images/taxi.png + :class-img-top: pt-5 w-75 d-block mx-auto + + +++ + .. button-ref:: batch_training + :ref-type: ref + :color: primary + :outline: + :expand: + :click-parent: + + Batch Training with Ray Data + + .. grid-item-card:: + :img-top: /images/ocr.jpg + :class-img-top: pt-5 w-75 d-block mx-auto + + +++ + .. button-ref:: ocr_example + :ref-type: ref + :color: primary + :outline: + :expand: + :click-parent: + + Scaling OCR with Ray Data - +++ - .. link-button:: ocr_example - :type: ref - :text: Scaling OCR with Ray Data - :classes: btn-link btn-block stretched-link Other Examples From ee1dbcd6481aa899d583cde257000036336b3f8c Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Thu, 20 Apr 2023 16:15:39 +0200 Subject: [PATCH 5/7] wrap up poc Signed-off-by: Max Pumperla --- doc/source/data/examples/index.rst | 35 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/doc/source/data/examples/index.rst b/doc/source/data/examples/index.rst index 8d780367fff8..0f815bd224f7 100644 --- a/doc/source/data/examples/index.rst +++ b/doc/source/data/examples/index.rst @@ -27,11 +27,10 @@ soon!). +++ .. button-ref:: nyc_taxi_basic_processing - :ref-type: ref + :ref-type: doc :color: primary :outline: :expand: - :click-parent: Processing the NYC taxi dataset @@ -41,11 +40,10 @@ soon!). +++ .. button-ref:: batch_training - :ref-type: ref + :ref-type: doc :color: primary :outline: :expand: - :click-parent: Batch Training with Ray Data @@ -55,11 +53,10 @@ soon!). +++ .. button-ref:: ocr_example - :ref-type: ref + :ref-type: doc :color: primary :outline: :expand: - :click-parent: Scaling OCR with Ray Data @@ -68,16 +65,20 @@ soon!). Other Examples -------------- -.. panels:: - :container: container pb-4 - :column: col-md-4 px-2 py-2 - :img-top-cls: pt-5 w-75 d-block mx-auto - --- - :img-top: ../images/datastream-arch.svg +.. grid:: 3 + :gutter: 2 + :class-container: container pb-4 + + .. grid-item-card:: + :img-top: ../images/datastream-arch.svg + :class-img-top: pt-5 w-75 d-block mx-auto + + +++ + .. button-ref:: random-access + :ref-type: doc + :color: primary + :outline: + :expand: - +++ - .. link-button:: random-access - :type: ref - :text: Random Data Access (Experimental) - :classes: btn-link btn-block stretched-link + Random Data Access (Experimental) From 6c20c3f7482d1dce276139694b80b7d3614c17ed Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Fri, 21 Apr 2023 10:48:35 +0200 Subject: [PATCH 6/7] relax build reqs for now Signed-off-by: Max Pumperla --- ci/ci.sh | 2 +- doc/source/conf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/ci.sh b/ci/ci.sh index 47ae6224b830..136928b92604 100755 --- a/ci/ci.sh +++ b/ci/ci.sh @@ -308,7 +308,7 @@ build_sphinx_docs() { if [ "${OSTYPE}" = msys ]; then echo "WARNING: Documentation not built on Windows due to currently-unresolved issues" else - FAST=True make html + FAST=True make develop pip install datasets==2.0.0 RAY_MOCK_MODULES=0 RAY_DEDUP_LOGS=0 make doctest fi diff --git a/doc/source/conf.py b/doc/source/conf.py index 5cdf1397494d..1dbaeea85714 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -38,7 +38,6 @@ extensions = [ "callouts", # custom extension from _ext folder - "sphinx_panels", "sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinx.ext.napoleon", @@ -58,6 +57,7 @@ "sphinxcontrib.redoc", "sphinx_tabs.tabs", "sphinx_remove_toctrees", + "sphinx_panels", "sphinx_design", ] From 59f4df7d1dc55f80a21ee114e837caf12f8eaf38 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Fri, 21 Apr 2023 14:30:13 +0200 Subject: [PATCH 7/7] add todo Signed-off-by: Max Pumperla --- ci/ci.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/ci.sh b/ci/ci.sh index 136928b92604..98071a8bfeaa 100755 --- a/ci/ci.sh +++ b/ci/ci.sh @@ -308,6 +308,7 @@ build_sphinx_docs() { if [ "${OSTYPE}" = msys ]; then echo "WARNING: Documentation not built on Windows due to currently-unresolved issues" else + # TODO: revert to "make html" once "sphinx_panels" plugin is fully removed. FAST=True make develop pip install datasets==2.0.0 RAY_MOCK_MODULES=0 RAY_DEDUP_LOGS=0 make doctest