Skip to content

Commit

Permalink
[Air][Data] Don't promote locality_hints for split (ray-project#26647)
Browse files Browse the repository at this point in the history
Why are these changes needed?
Since locality_hints is an experimental feature, we stop promoting it in doc and don't enable it in AIR. See ray-project#26641 for more context

Signed-off-by: Stefan van der Kleij <[email protected]>
  • Loading branch information
scv119 authored and Stefan van der Kleij committed Aug 18, 2022
1 parent ea9790a commit 82affc4
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 8 deletions.
2 changes: 1 addition & 1 deletion doc/source/data/doc_code/accessing_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def train(self, shard: ray.data.Dataset[int]) -> int:
ds = ray.data.range(10000)
# -> Dataset(num_blocks=200, num_rows=10000, schema=<class 'int'>)

shards = ds.split(n=4, locality_hints=workers)
shards = ds.split(n=4)
# -> [Dataset(num_blocks=13, num_rows=2500, schema=<class 'int'>),
# Dataset(num_blocks=13, num_rows=2500, schema=<class 'int'>), ...]

Expand Down
2 changes: 1 addition & 1 deletion doc/source/data/doc_code/quick_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def train(self, shard) -> int:
workers = [Worker.remote(i) for i in range(4)]
# -> [Actor(Worker, ...), Actor(Worker, ...), ...]

shards = ds.split(n=4, locality_hints=workers)
shards = ds.split(n=4)
# -> [
# Dataset(num_blocks=3, num_rows=45,
# schema={sepal.length: double, sepal.width: double,
Expand Down
4 changes: 2 additions & 2 deletions doc/source/data/examples/nyc_taxi_basic_processing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,7 @@
"id": "8b10fc64",
"metadata": {},
"source": [
"Next, we split the dataset into ``len(trainers)`` shards, ensuring that the shards are of equal size, and providing the trainer actor handles to Ray Datasets as locality hints, so Datasets can try to colocate shard data with trainers in order to decrease data movement."
"Next, we split the dataset into ``len(trainers)`` shards, ensuring that the shards are of equal size."
]
},
{
Expand All @@ -884,7 +884,7 @@
}
],
"source": [
"shards = ds.split(n=len(trainers), equal=True, locality_hints=trainers)\n",
"shards = ds.split(n=len(trainers), equal=True)\n",
"shards"
]
},
Expand Down
5 changes: 1 addition & 4 deletions python/ray/train/_internal/dataset_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ class RayDatasetSpec:
training workers (to use as locality hints). The Callable is expected to
return a list of RayDatasets or a list of dictionaries of RayDatasets,
with the length of the list equal to the length of the list of actor handles.
If None is provided, the provided Ray Dataset(s) will be simply be split using
the actor handles as locality hints.
If None is provided, the provided Ray Dataset(s) will be equally split.
"""

Expand All @@ -48,7 +47,6 @@ def split_dataset(dataset_or_pipeline):
return dataset_or_pipeline.split(
len(training_worker_handles),
equal=True,
locality_hints=training_worker_handles,
)

if isinstance(self.dataset_or_dict, dict):
Expand Down Expand Up @@ -209,7 +207,6 @@ def get_dataset_shards(
dataset_splits = dataset.split(
len(training_worker_handles),
equal=True,
locality_hints=training_worker_handles,
)
else:
dataset_splits = [dataset] * len(training_worker_handles)
Expand Down

0 comments on commit 82affc4

Please sign in to comment.