[Air][Data] Don't promote locality_hints for split (ray-project#26647)

Why are these changes needed? Since locality_hints is an experimental feature, we stop promoting it in doc and don't enable it in AIR. See ray-project#26641 for more context Signed-off-by: Stefan van der Kleij <[email protected]>
Stefan-1313 · Aug 18, 2022 · 82affc4 · 82affc4
1 parent ea9790a
commit 82affc4
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 8 deletions.
diff --git a/doc/source/data/doc_code/accessing_datasets.py b/doc/source/data/doc_code/accessing_datasets.py
@@ -228,7 +228,7 @@ def train(self, shard: ray.data.Dataset[int]) -> int:
 ds = ray.data.range(10000)
 # -> Dataset(num_blocks=200, num_rows=10000, schema=<class 'int'>)
 
-shards = ds.split(n=4, locality_hints=workers)
+shards = ds.split(n=4)
 # -> [Dataset(num_blocks=13, num_rows=2500, schema=<class 'int'>),
 #     Dataset(num_blocks=13, num_rows=2500, schema=<class 'int'>), ...]
 

diff --git a/doc/source/data/doc_code/quick_start.py b/doc/source/data/doc_code/quick_start.py
@@ -134,7 +134,7 @@ def train(self, shard) -> int:
 workers = [Worker.remote(i) for i in range(4)]
 # -> [Actor(Worker, ...), Actor(Worker, ...), ...]
 
-shards = ds.split(n=4, locality_hints=workers)
+shards = ds.split(n=4)
 # -> [
 #       Dataset(num_blocks=3, num_rows=45,
 #               schema={sepal.length: double, sepal.width: double,

diff --git a/doc/source/data/examples/nyc_taxi_basic_processing.ipynb b/doc/source/data/examples/nyc_taxi_basic_processing.ipynb
@@ -858,7 +858,7 @@
    "id": "8b10fc64",
    "metadata": {},
    "source": [
-    "Next, we split the dataset into ``len(trainers)`` shards, ensuring that the shards are of equal size, and providing the trainer actor handles to Ray Datasets as locality hints, so Datasets can try to colocate shard data with trainers in order to decrease data movement."
+    "Next, we split the dataset into ``len(trainers)`` shards, ensuring that the shards are of equal size."
    ]
   },
   {
@@ -884,7 +884,7 @@
     }
    ],
    "source": [
-    "shards = ds.split(n=len(trainers), equal=True, locality_hints=trainers)\n",
+    "shards = ds.split(n=len(trainers), equal=True)\n",
     "shards"
    ]
   },

diff --git a/python/ray/train/_internal/dataset_spec.py b/python/ray/train/_internal/dataset_spec.py
@@ -28,8 +28,7 @@ class RayDatasetSpec:
         training workers (to use as locality hints). The Callable is expected to
         return a list of RayDatasets or a list of dictionaries of RayDatasets,
         with the length of the list equal to the length of the list of actor handles.
-        If None is provided, the provided Ray Dataset(s) will be simply be split using
-        the actor handles as locality hints.
+        If None is provided, the provided Ray Dataset(s) will be equally split.
 
     """
 
@@ -48,7 +47,6 @@ def split_dataset(dataset_or_pipeline):
             return dataset_or_pipeline.split(
                 len(training_worker_handles),
                 equal=True,
-                locality_hints=training_worker_handles,
             )
 
         if isinstance(self.dataset_or_dict, dict):
@@ -209,7 +207,6 @@ def get_dataset_shards(
                 dataset_splits = dataset.split(
                     len(training_worker_handles),
                     equal=True,
-                    locality_hints=training_worker_handles,
                 )
             else:
                 dataset_splits = [dataset] * len(training_worker_handles)