-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[tune] Better error message for Tune nested tasks / actors (#25241)
This PR uses a task/actor launch hook to generate better error messages for nested Tune tasks/actors in the case there are no extra resources reserved for them. The idea is that the Tune trial runner actor can set a hook prior to executing the user code. If the user code launches a task, and the placement group for the trial cannot possibly fit the task, then we raise TuneError right off to warn the user.
- Loading branch information
Showing
8 changed files
with
275 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import pytest | ||
|
||
import ray | ||
from ray import tune | ||
from ray.data.context import DatasetContext | ||
from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy | ||
from ray.tune.error import TuneError | ||
|
||
|
||
def test_nowarn_zero_cpu(): | ||
def f(*a): | ||
@ray.remote(num_cpus=0) | ||
def f(): | ||
pass | ||
|
||
@ray.remote(num_cpus=0) | ||
class Actor: | ||
def f(self): | ||
pass | ||
|
||
ray.get(f.remote()) | ||
a = Actor.remote() | ||
ray.get(a.f.remote()) | ||
|
||
tune.run(f, verbose=0) | ||
|
||
|
||
def test_warn_cpu(): | ||
def f(*a): | ||
@ray.remote(num_cpus=1) | ||
def f(): | ||
pass | ||
|
||
ray.get(f.remote()) | ||
|
||
with pytest.raises(TuneError): | ||
tune.run(f, verbose=0) | ||
|
||
with pytest.raises(TuneError): | ||
tune.run( | ||
f, resources_per_trial=tune.PlacementGroupFactory([{"CPU": 1}]), verbose=0 | ||
) | ||
|
||
def g(*a): | ||
@ray.remote(num_cpus=1) | ||
class Actor: | ||
def f(self): | ||
pass | ||
|
||
a = Actor.remote() | ||
ray.get(a.f.remote()) | ||
|
||
with pytest.raises(TuneError): | ||
tune.run(g, verbose=0) | ||
|
||
with pytest.raises(TuneError): | ||
tune.run( | ||
g, resources_per_trial=tune.PlacementGroupFactory([{"CPU": 1}]), verbose=0 | ||
) | ||
|
||
|
||
def test_pg_slots_ok(): | ||
def f(*a): | ||
@ray.remote(num_cpus=1) | ||
def f(): | ||
pass | ||
|
||
@ray.remote(num_cpus=1) | ||
class Actor: | ||
def f(self): | ||
pass | ||
|
||
ray.get(f.remote()) | ||
a = Actor.remote() | ||
ray.get(a.f.remote()) | ||
|
||
tune.run( | ||
f, resources_per_trial=tune.PlacementGroupFactory([{"CPU": 1}] * 2), verbose=0 | ||
) | ||
|
||
|
||
def test_bad_pg_slots(): | ||
def f(*a): | ||
@ray.remote(num_cpus=2) | ||
def f(): | ||
pass | ||
|
||
ray.get(f.remote()) | ||
|
||
with pytest.raises(TuneError): | ||
tune.run( | ||
f, | ||
resources_per_trial=tune.PlacementGroupFactory([{"CPU": 1}] * 2), | ||
verbose=0, | ||
) | ||
|
||
|
||
def test_dataset_ok(): | ||
def f(*a): | ||
ray.data.range(10).show() | ||
|
||
tune.run(f, verbose=0) | ||
|
||
def g(*a): | ||
ctx = DatasetContext.get_current() | ||
ctx.scheduling_strategy = PlacementGroupSchedulingStrategy( | ||
ray.util.get_current_placement_group() | ||
) | ||
ray.data.range(10).show() | ||
|
||
with pytest.raises(TuneError): | ||
tune.run(g, verbose=0) | ||
|
||
tune.run( | ||
g, resources_per_trial=tune.PlacementGroupFactory([{"CPU": 1}] * 2), verbose=0 | ||
) | ||
|
||
|
||
def test_scheduling_strategy_override(): | ||
def f(*a): | ||
@ray.remote(num_cpus=1, scheduling_strategy="SPREAD") | ||
def f(): | ||
pass | ||
|
||
@ray.remote(num_cpus=1, scheduling_strategy="SPREAD") | ||
class Actor: | ||
def f(self): | ||
pass | ||
|
||
# SPREAD tasks are not captured by placement groups, so don't warn. | ||
ray.get(f.remote()) | ||
|
||
# SPREAD actors are not captured by placement groups, so don't warn. | ||
a = Actor.remote() | ||
ray.get(a.f.remote()) | ||
|
||
tune.run(f, verbose=0) | ||
|
||
|
||
if __name__ == "__main__": | ||
import sys | ||
|
||
sys.exit(pytest.main(["-v", __file__])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters