ray-project · jjyao · Oct 18, 2022 · Oct 13, 2022 · Oct 13, 2022 · Oct 13, 2022
diff --git a/doc/source/ray-core/doc_code/pattern_tree_of_tasks.py b/doc/source/ray-core/doc_code/pattern_tree_of_tasks.py
@@ -0,0 +1,75 @@
+# __pattern_start__
+import ray
+import time
+from numpy import random
+
+
+ray.init()
+
+
+def partition(collection):
+    # Use the last element as the pivot
+    pivot = collection.pop()
+    greater, lesser = [], []
+    for element in collection:
+        if element > pivot:
+            greater.append(element)
+        else:
+            lesser.append(element)
+    return lesser, pivot, greater
+
+
+def quick_sort(collection):
+    if len(collection) <= 200000:  # magic number
+        return sorted(collection)
+    else:
+        lesser, pivot, greater = partition(collection)
+        lesser = quick_sort(lesser)
+        greater = quick_sort(greater)
+    return lesser + [pivot] + greater
+
+
+@ray.remote
+def quick_sort_distributed(collection):
+    # Tiny tasks are an antipattern.
+    # Thus, in our example we have a "magic number" to
+    # toggle when distributed recursion should be used vs
+    # when the sorting should be done in place. The rule
+    # of thumb is that the duration of an individual task
+    # should be at least 1 second.
+    if len(collection) <= 200000:  # magic number
+        return sorted(collection)
+    else:
+        lesser, pivot, greater = partition(collection)
+        lesser = quick_sort_distributed.remote(lesser)
+        greater = quick_sort_distributed.remote(greater)
+        return ray.get(lesser) + [pivot] + ray.get(greater)
+
+
+for size in [200000, 4000000, 8000000]:
+    print(f"Array size: {size}")
+    unsorted = random.randint(1000000, size=(size)).tolist()
+    s = time.time()
+    quick_sort(unsorted)
+    print(f"Sequential execution: {(time.time() - s):.3f}")
+    s = time.time()
+    ray.get(quick_sort_distributed.remote(unsorted))
+    print(f"Distributed execution: {(time.time() - s):.3f}")
+    print("--" * 10)
+
+# Outputs:
+
+# Array size: 200000
+# Sequential execution: 0.040
+# Distributed execution: 0.152
+# --------------------
+# Array size: 4000000
+# Sequential execution: 6.161
+# Distributed execution: 5.779
+# --------------------
+# Array size: 8000000
+# Sequential execution: 15.459
+# Distributed execution: 11.282
+# --------------------
+
+# __pattern_end__
@@ -8,6 +8,7 @@ This section is a collection of common design patterns and anti-patterns for wri
 .. toctree::
     :maxdepth: 1
 
+    tree-of-tasks
     generators
     limit-pending-tasks
     limit-running-tasks

diff --git a/doc/source/ray-core/patterns/tree-of-tasks.rst b/doc/source/ray-core/patterns/tree-of-tasks.rst
@@ -0,0 +1,32 @@
+.. _task-pattern-tree-of-tasks:
+
+Pattern: Using a tree of tasks to achieve nested parallelism
+============================================================
+
+In this pattern, a remote function can dynamically call other remote functions (including itself) for parallelism.
+
+
+Example use case
+----------------
+
+You want to quick-sort a large list of numbers.
+By making the recursive calls distributed, we can sort the list distributedly and parallelly.
+
+.. figure:: ../images/tree-of-tasks.svg
+
+    Tree of tasks
+
+
+Code example
+------------
+
+.. literalinclude:: ../doc_code/pattern_tree_of_tasks.py
+    :language: python
+    :start-after: __pattern_start__
+    :end-before: __pattern_end__
+
+We call :ref:`ray.get() <ray-get-ref>` after both ``quick_sort_distributed`` function invocations take place.
+This allows you to maximize parallelism in the workload. See :doc:`ray-get-loop` for more details.
+
+Notice in the execution times above that with smaller and finer tasks, the non-distributed version is faster; however, as the task execution
+time increases, that is the task with larger list takes longer, the distributed version is faster. See :doc:`too-fine-grained-tasks` for more details.
@@ -13,5 +13,4 @@ You may also be interested in visiting the design patterns section for :ref:`act
 .. toctree::
     :maxdepth: -1
 
-    tree-of-tasks
     map-reduce