From a6f91dabcdb5bbd14d974ec4c42d7028ba474d83 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 28 Aug 2024 18:20:20 -0700 Subject: [PATCH 1/3] [Data][Doc] Add tip about how to understand map_batches format --- python/ray/data/dataset.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/ray/data/dataset.py b/python/ray/data/dataset.py index eafaeb3a60fa..2a6bc56b43a2 100644 --- a/python/ray/data/dataset.py +++ b/python/ray/data/dataset.py @@ -408,6 +408,11 @@ def map_batches( stateful Ray actors. For more information, see :ref:`Stateful Transforms `. + .. tip:: + For interactive development, to understand the format of the input to ``fn``, + you can call :meth:`~Dataset.take_batch` on the dataset to get a batch in the + same format as will be passed to ``fn``. + .. tip:: If ``fn`` doesn't mutate its input, set ``zero_copy_batch=True`` to improve performance and decrease memory utilization. @@ -554,6 +559,11 @@ def __call__(self, batch: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: :meth:`~Dataset.iter_batches` Call this function to iterate over batches of data. + :meth:`~Dataset.take_batch` + Call this function to get a batch of data from the dataset + in the same format as will be passed to the `fn` function of + :meth:`~Dataset.map_batches`. + :meth:`~Dataset.flat_map` Call this method to create new records from existing ones. Unlike :meth:`~Dataset.map`, a function passed to :meth:`~Dataset.flat_map` From 308597964f83f3f145edd442f85eea8c80e7d205 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Thu, 3 Oct 2024 11:35:24 -0700 Subject: [PATCH 2/3] Update python/ray/data/dataset.py Co-authored-by: Balaji Veeramani Signed-off-by: Philipp Moritz --- python/ray/data/dataset.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/ray/data/dataset.py b/python/ray/data/dataset.py index ef6c6df89ce4..4e307a410082 100644 --- a/python/ray/data/dataset.py +++ b/python/ray/data/dataset.py @@ -411,9 +411,8 @@ def map_batches( .. tip:: For interactive development, to understand the format of the input to ``fn``, - you can call :meth:`~Dataset.take_batch` on the dataset to get a batch in the + call :meth:`~Dataset.take_batch` on the dataset to get a batch in the same format as will be passed to ``fn``. - .. tip:: If ``fn`` doesn't mutate its input, set ``zero_copy_batch=True`` to improve performance and decrease memory utilization. From 30c09eec2094912828d88a5846e14c65258c9b4d Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Thu, 3 Oct 2024 11:38:07 -0700 Subject: [PATCH 3/3] wording --- python/ray/data/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ray/data/dataset.py b/python/ray/data/dataset.py index 4e307a410082..7f62566ff591 100644 --- a/python/ray/data/dataset.py +++ b/python/ray/data/dataset.py @@ -410,9 +410,9 @@ def map_batches( :ref:`Stateful Transforms `. .. tip:: - For interactive development, to understand the format of the input to ``fn``, - call :meth:`~Dataset.take_batch` on the dataset to get a batch in the - same format as will be passed to ``fn``. + To understand the format of the input to ``fn``, call :meth:`~Dataset.take_batch` + on the dataset to get a batch in the same format as will be passed to ``fn``. + .. tip:: If ``fn`` doesn't mutate its input, set ``zero_copy_batch=True`` to improve performance and decrease memory utilization.