From eb37bb857c2cce9d91ac8c7b2bf6c2a9d34ee65b Mon Sep 17 00:00:00 2001 From: Jiajun Yao Date: Mon, 15 Aug 2022 09:24:34 -0700 Subject: [PATCH] Revamp ray core design patterns doc [1/n]: generators (#27823) - Move the code snippet to doc_code folder - Move patterns to an upper level. Signed-off-by: Jiajun Yao --- .../ray-core/doc_code/pattern_generators.py | 61 ++++++++++++ doc/source/ray-core/patterns/generators.rst | 48 ++++++++++ doc/source/ray-core/patterns/index.rst | 11 +++ .../ray-core/tasks/patterns/generators.rst | 96 ------------------- doc/source/ray-core/tasks/patterns/index.rst | 1 - doc/source/ray-core/user-guide.rst | 1 + 6 files changed, 121 insertions(+), 97 deletions(-) create mode 100644 doc/source/ray-core/doc_code/pattern_generators.py create mode 100644 doc/source/ray-core/patterns/generators.rst create mode 100644 doc/source/ray-core/patterns/index.rst delete mode 100644 doc/source/ray-core/tasks/patterns/generators.rst diff --git a/doc/source/ray-core/doc_code/pattern_generators.py b/doc/source/ray-core/doc_code/pattern_generators.py new file mode 100644 index 000000000000..3badee26cf1b --- /dev/null +++ b/doc/source/ray-core/doc_code/pattern_generators.py @@ -0,0 +1,61 @@ +import sys + +if len(sys.argv) == 1: + # A small number for CI test. + sys.argv.append("5") + +# __program_start__ +import sys +import ray + +# fmt: off +# __large_values_start__ +import numpy as np + + +@ray.remote +def large_values(num_returns): + return [ + np.random.randint(np.iinfo(np.int8).max, size=(100_000_000, 1), dtype=np.int8) + for _ in range(num_returns) + ] +# __large_values_end__ +# fmt: on + + +# fmt: off +# __large_values_generator_start__ +@ray.remote +def large_values_generator(num_returns): + for i in range(num_returns): + yield np.random.randint( + np.iinfo(np.int8).max, size=(100_000_000, 1), dtype=np.int8 + ) + print(f"yielded return value {i}") +# __large_values_generator_end__ +# fmt: on + + +# A large enough value (e.g. 100). +num_returns = int(sys.argv[1]) +# Worker will likely OOM using normal returns. +print("Using normal functions...") +try: + ray.get( + large_values.options(num_returns=num_returns, max_retries=0).remote( + num_returns + )[0] + ) +except ray.exceptions.WorkerCrashedError: + print("Worker failed with normal function") + +# Using a generator will allow the worker to finish. +# Note that this will block until the full task is complete, i.e. the +# last yield finishes. +print("Using generators...") +ray.get( + large_values_generator.options(num_returns=num_returns, max_retries=0).remote( + num_returns + )[0] +) +print("Success!") diff --git a/doc/source/ray-core/patterns/generators.rst b/doc/source/ray-core/patterns/generators.rst new file mode 100644 index 000000000000..c19c21d49448 --- /dev/null +++ b/doc/source/ray-core/patterns/generators.rst @@ -0,0 +1,48 @@ +.. _generators: + +Pattern: Using generators to reduce heap memory usage +===================================================== + +In this pattern, we use **generators** in Python to reduce the total heap memory usage during a task. The key idea is that for tasks that return multiple objects, we can return them one at a time instead of all at once. This allows a worker to free the heap memory used by a previous return value before returning the next one. + +Example use case +---------------- + +You have a task that returns multiple large values. Another possibility is a task that returns a single large value, but you want to stream this value through Ray's object store by breaking it up into smaller chunks. + +Using normal Python functions, we can write such a task like this. Here's an example that returns numpy arrays of size 100MB each: + +.. literalinclude:: ../doc_code/pattern_generators.py + :language: python + :start-after: __large_values_start__ + :end-before: __large_values_end__ + +However, this will require the task to hold all ``num_returns`` arrays in heap memory at the same time at the end of the task. If there are many return values, this can lead to high heap memory usage and potentially an out-of-memory error. + +We can fix the above example by rewriting ``large_values`` as a **generator**. Instead of returning all values at once as a tuple or list, we can ``yield`` one value at a time. + +.. literalinclude:: ../doc_code/pattern_generators.py + :language: python + :start-after: __large_values_generator_start__ + :end-before: __large_values_generator_end__ + +Code example +------------ + +.. literalinclude:: ../doc_code/pattern_generators.py + :language: python + :start-after: __program_start__ + +.. code-block:: text + + $ RAY_IGNORE_UNHANDLED_ERRORS=1 python test.py 100 + + Using normal functions... + ... -- A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker... + Worker failed + Using generators... + (large_values_generator pid=373609) yielded return value 0 + (large_values_generator pid=373609) yielded return value 1 + (large_values_generator pid=373609) yielded return value 2 + ... + Success! diff --git a/doc/source/ray-core/patterns/index.rst b/doc/source/ray-core/patterns/index.rst new file mode 100644 index 000000000000..ba1b8dca894e --- /dev/null +++ b/doc/source/ray-core/patterns/index.rst @@ -0,0 +1,11 @@ +.. _core-patterns: + +Design Patterns & Anti-patterns +=============================== + +This section is a collection of common design patterns and anti-patterns for writing Ray applications. + +.. toctree:: + :maxdepth: 1 + + generators diff --git a/doc/source/ray-core/tasks/patterns/generators.rst b/doc/source/ray-core/tasks/patterns/generators.rst deleted file mode 100644 index 1c6bfb5a8f5f..000000000000 --- a/doc/source/ray-core/tasks/patterns/generators.rst +++ /dev/null @@ -1,96 +0,0 @@ -.. _generators: - -Pattern: Using generators to reduce heap memory usage -===================================================== - -In this pattern, we use **generators** in Python to reduce the total heap memory usage during a task. The key idea is that for tasks that return multiple objects, we can return them one at a time instead of all at once. This allows a worker to free the heap memory used by a previous return value before returning the next one. - -Example use case ----------------- - -You have a task that returns multiple large values. Another possibility is a task that returns a single large value, but you want to stream this value through Ray's object store by breaking it up into smaller chunks. - -Using normal Python functions, we can write such a task like this. Here's an example that returns numpy arrays of size 100MB each: - -.. code-block:: python - - import numpy as np - - @ray.remote - def large_values(num_returns): - return [ - np.random.randint( - np.iinfo(np.int8).max, size=(100_000_000, 1), dtype=np.int8 - ) - for _ in range(num_returns) - ] - -However, this will require the task to hold all ``num_returns`` arrays in heap memory at the same time at the end of the task. If there are many return values, this can lead to high heap memory usage and potentially an out-of-memory error. - -We can fix the above example by rewriting ``large_values`` as a **generator**. Instead of returning all values at once as a tuple or list, we can ``yield`` one value at a time. - -.. code-block:: python - - @ray.remote - def large_values_generator(num_returns): - for _ in range(num_returns): - yield np.random.randint( - np.iinfo(np.int8).max, size=(100_000_000, 1), dtype=np.int8 - ) - -Code example ------------- - -.. code-block:: python - - import numpy as np - import ray - - @ray.remote(max_retries=0) - def large_values(num_returns): - return [ - np.random.randint( - np.iinfo(np.int8).max, size=(100_000_000, 1), dtype=np.int8 - ) - for _ in range(num_returns) - ] - - @ray.remote(max_retries=0) - def large_values_generator(num_returns): - for i in range(num_returns): - yield np.random.randint( - np.iinfo(np.int8).max, size=(100_000_000, 1), dtype=np.int8 - ) - print(f"yielded return value {i}") - - num_returns = 100 - # Worker will likely OOM using normal returns. - print("Using normal functions...") - try: - ray.get(large_values.options(num_returns=num_returns).remote(num_returns)[0]) - except ray.exceptions.WorkerCrashedError: - print("Worker failed with normal function") - - # Using a generator will allow the worker to finish. - # Note that this will block until the full task is complete, i.e. the - # last yield finishes. - print("Using generators...") - ray.get( - large_values_generator.options(num_returns=num_returns).remote(num_returns)[0] - ) - print("Success!") - -.. code-block:: text - - $ RAY_IGNORE_UNHANDLED_ERRORS=1 python test.py - - Using normal functions... - ... -- A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker... - Worker failed - Using generators... - (large_values_generator pid=373609) yielded return value 0 - (large_values_generator pid=373609) yielded return value 1 - (large_values_generator pid=373609) yielded return value 2 - ... - Success! - diff --git a/doc/source/ray-core/tasks/patterns/index.rst b/doc/source/ray-core/tasks/patterns/index.rst index 6abe8320930c..97fdefae2c1c 100644 --- a/doc/source/ray-core/tasks/patterns/index.rst +++ b/doc/source/ray-core/tasks/patterns/index.rst @@ -16,7 +16,6 @@ You may also be interested in visiting the design patterns section for :ref:`act tree-of-tasks map-reduce limit-tasks - generators closure-capture fine-grained-tasks global-variables diff --git a/doc/source/ray-core/user-guide.rst b/doc/source/ray-core/user-guide.rst index c31ca6fa6135..49c5de2a954f 100644 --- a/doc/source/ray-core/user-guide.rst +++ b/doc/source/ray-core/user-guide.rst @@ -13,4 +13,5 @@ If you’re brand new to Ray, we recommend starting with the :ref:`walkthrough < objects placement-group handling-dependencies + patterns/index.rst advanced-topics