Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[tune] Add Tuner.get_results() to retrieve results after restore #29083

Merged
merged 4 commits into from
Oct 6, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion python/ray/tune/impl/tuner_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
_TUNER_PKL = "tuner.pkl"
_TRAINABLE_KEY = "_trainable"
_PARAM_SPACE_KEY = "_param_space"
_EXPERIMENT_ANALYSIS_KEY = "_experiment_analysis"


class TunerInternal:
Expand Down Expand Up @@ -104,6 +105,8 @@ def __init__(
self._run_config
)

self._experiment_analysis = None

# Not used for restored Tuner.
self._param_space = param_space or {}
self._process_scaling_config()
Expand Down Expand Up @@ -219,6 +222,13 @@ def _restore_from_path_or_uri(
shutil.rmtree(experiment_checkpoint_path)
self._experiment_checkpoint_dir = str(new_exp_path)

try:
self._experiment_analysis = ExperimentAnalysis(
self._experiment_checkpoint_dir
)
except Exception:
self._experiment_analysis = None

def _maybe_sync_down_tuner_state(self, restore_path: str) -> Tuple[bool, str]:
"""Sync down trainable state from remote storage.

Expand Down Expand Up @@ -284,7 +294,17 @@ def fit(self) -> ResultGrid:
else:
analysis = self._fit_resume(trainable)

return ResultGrid(analysis)
self._experiment_analysis = analysis

return ResultGrid(self._experiment_analysis)

def get_results(self) -> ResultGrid:
if not self._experiment_analysis:
raise RuntimeError(
"Can't return results as experiment has not been run, yet. "
"Call `Tuner.fit()` to run the experiment first."
)
return ResultGrid(self._experiment_analysis)

def _get_tune_run_arguments(self, trainable) -> Dict[str, Any]:
"""Get tune.run arguments common for both new and resumed runs."""
Expand Down Expand Up @@ -413,6 +433,7 @@ def __getstate__(self):
state = self.__dict__.copy()
state.pop(_TRAINABLE_KEY, None)
state.pop(_PARAM_SPACE_KEY, None)
state.pop(_EXPERIMENT_ANALYSIS_KEY, None)
return state

def __setstate__(self, state):
Expand Down
23 changes: 23 additions & 0 deletions python/ray/tune/tests/test_tuner_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ def test_tuner_restore_num_trials(ray_start_4_cpus, tmpdir):

del tuner
tuner = Tuner.restore(str(tmpdir / "test_tuner_restore_num_trials"))

# Check restored results
results = tuner.get_results()
assert len(results) == 4

results = tuner.fit()
assert len(results) == 4

Expand Down Expand Up @@ -137,7 +142,17 @@ def test_tuner_restore_resume_errored(ray_start_4_cpus, tmpdir):
tuner = Tuner.restore(
str(tmpdir / "test_tuner_restore_resume_errored"), resume_errored=True
)

# Check restored results
results = tuner.get_results()
assert len(results) == 4
assert len(results.errors) == 2
# Second and third trial are at iter 1 because they failed after first report
assert [r.metrics["it"] for r in results] == [2, 1, 2, 1]

# Get new results
results = tuner.fit()

assert len(results) == 4
assert len(results.errors) == 0
# Since the errored trials are being resumed from previous state and then report
Expand Down Expand Up @@ -176,6 +191,14 @@ def test_tuner_restore_restart_errored(ray_start_4_cpus, tmpdir):
tuner = Tuner.restore(
str(tmpdir / "test_tuner_restore_restart_errored"), restart_errored=True
)

# Check restored results
results = tuner.get_results()
krfricke marked this conversation as resolved.
Show resolved Hide resolved
assert len(results) == 4
assert len(results.errors) == 2
assert [r.metrics["it"] for r in results] == [2, 1, 2, 1]

# Get new results
results = tuner.fit()
assert len(results) == 4
assert len(results.errors) == 0
Expand Down
25 changes: 25 additions & 0 deletions python/ray/tune/tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,28 @@ def fit(self) -> ResultGrid:
raise TuneError(
_TUNER_FAILED_MSG.format(path=experiment_checkpoint_dir)
) from e

def get_results(self) -> ResultGrid:
"""Get results of a hyperparameter tuning run.

This method returns the same results as :meth:`fit() <ray.tune.tuner.Tuner.fit>`
and can be used to retrieve the results after restoring a tuner without
calling ``fit()`` again.

If the tuner has not been fit before, an error will be raised.

.. code-block:: python

from ray.tune import Tuner

tuner = Tuner.restore("/path/to/experiment')
results = tuner.get_results()

Returns:
Result grid of a previously fitted tuning run.

"""
if not self._is_ray_client:
return self._local_tuner.get_results()
else:
return ray.get(self._remote_tuner.fit.remote())