From 82004b3dd959a10df69940ecbabd013339b86a85 Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Sat, 28 Jan 2023 16:45:59 -0800 Subject: [PATCH] [Datasets] [Docs] Add `seealso` to map-related methods (#30579) This PR adds seealso notes to help users distinguish between map, flat_map, and map_batches. Signed-off-by: Balaji Veeramani Signed-off-by: Edward Oakes --- python/ray/data/dataset.py | 40 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/python/ray/data/dataset.py b/python/ray/data/dataset.py index 84fca0f5b90d..48b3dff224b4 100644 --- a/python/ray/data/dataset.py +++ b/python/ray/data/dataset.py @@ -304,6 +304,20 @@ def map( must be used. ray_remote_args: Additional resource requirements to request from ray (e.g., num_gpus=1 to request GPUs for the map tasks). + + .. seealso:: + + :meth:`~Dataset.flat_map`: + Call this method to create new records from existing ones. Unlike + :meth:`~Dataset.map`, a function passed to :meth:`~Dataset.flat_map` + can return multiple records. + + :meth:`~Dataset.flat_map` isn't recommended because it's slow; call + :meth:`~Dataset.map_batches` instead. + + :meth:`~Dataset.map_batches` + Call this method to transform batches of data. It's faster and more + flexible than :meth:`~Dataset.map` and :meth:`~Dataset.flat_map`. """ if isinstance(fn, CallableClass) and ( compute is None @@ -528,6 +542,20 @@ def map_batches( :meth:`~Dataset.default_batch_format` Call this function to determine the default batch type. + + :meth:`~Dataset.flat_map`: + Call this method to create new records from existing ones. Unlike + :meth:`~Dataset.map`, a function passed to :meth:`~Dataset.flat_map` + can return multiple records. + + :meth:`~Dataset.flat_map` isn't recommended because it's slow; call + :meth:`~Dataset.map_batches` instead. + + :meth:`~Dataset.map` + Call this method to transform one record at time. + + This method isn't recommended because it's slow; call + :meth:`~Dataset.map_batches` instead. """ # noqa: E501 if batch_format == "native": @@ -793,6 +821,18 @@ def flat_map( must be used. ray_remote_args: Additional resource requirements to request from ray (e.g., num_gpus=1 to request GPUs for the map tasks). + + .. seealso:: + + :meth:`~Dataset.map_batches` + Call this method to transform batches of data. It's faster and more + flexible than :meth:`~Dataset.map` and :meth:`~Dataset.flat_map`. + + :meth:`~Dataset.map` + Call this method to transform one record at time. + + This method isn't recommended because it's slow; call + :meth:`~Dataset.map_batches` instead. """ if isinstance(fn, CallableClass) and ( compute is None