forked from ray-project/ray
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[air/tuner] Expose number of errored/terminated trials in ResultGrid (r…
…ay-project#26655) This introduces an easy interface to retrieve the number of errored and terminated (non-errored) trials from the result grid. Previously `tune.run(raise_on_failed_trial)` could be used to raise a TuneError if at least one trial failed. We've removed this option to make sure we always get a return value. `ResultGrid.num_errored` will make it easy for users to identify if trials failed and react to it instead of the old try-catch loop. Signed-off-by: Kai Fricke <[email protected]> Signed-off-by: Xiaowei Jiang <[email protected]>
- Loading branch information
1 parent
14b2291
commit 3e14b45
Showing
6 changed files
with
103 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import os | ||
import tempfile | ||
|
||
from ray.tune import Callback | ||
from ray.tune.execution.trial_runner import TrialRunner | ||
|
||
|
||
class TrialResultObserver(Callback): | ||
"""Helper class to control runner.step() count.""" | ||
|
||
def __init__(self): | ||
self._counter = 0 | ||
self._last_counter = 0 | ||
|
||
def reset(self): | ||
self._last_counter = self._counter | ||
|
||
def just_received_a_result(self): | ||
if self._last_counter == self._counter: | ||
return False | ||
else: | ||
self._last_counter = self._counter | ||
return True | ||
|
||
def on_trial_result(self, **kwargs): | ||
self._counter += 1 | ||
|
||
|
||
def create_tune_experiment_checkpoint(trials: list, **runner_kwargs) -> str: | ||
experiment_dir = tempfile.mkdtemp() | ||
runner_kwargs.setdefault("local_checkpoint_dir", experiment_dir) | ||
|
||
# Update environment | ||
orig_env = os.environ.copy() | ||
|
||
# Set to 1 to disable ray cluster resource lookup. That way we can | ||
# create experiment checkpoints without initializing ray. | ||
os.environ["TUNE_MAX_PENDING_TRIALS_PG"] = "1" | ||
|
||
try: | ||
runner = TrialRunner(**runner_kwargs) | ||
|
||
for trial in trials: | ||
runner.add_trial(trial) | ||
|
||
runner.checkpoint(force=True) | ||
finally: | ||
os.environ.clear() | ||
os.environ.update(orig_env) | ||
|
||
return experiment_dir |
This file was deleted.
Oops, something went wrong.