Skip to content

Commit

Permalink
schedulers: reward_attr -> metric, mode (ray-project#4120)
Browse files Browse the repository at this point in the history
  • Loading branch information
hershg committed May 25, 2019
1 parent 2015085 commit 8f69bb1
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 26 deletions.
19 changes: 14 additions & 5 deletions python/ray/tune/schedulers/async_hyperband.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ class AsyncHyperBandScheduler(FIFOScheduler):
Note that you can pass in something non-temporal such as
`training_iteration` as a measure of progress, the only requirement
is that the attribute should increase monotonically.
reward_attr (str): The training result objective value attribute. As
metric (str): The training result objective value attribute. As
with `time_attr`, this may refer to any objective value. Stopping
procedures will use this attribute.
mode (str): One of {min, max}. Determines whether objective is minimizing
maximizing the metric attribute
max_t (float): max time units per trial. Trials will be stopped after
max_t time units (determined by time_attr) have passed.
grace_period (float): Only stop trials at least this old in time.
Expand All @@ -40,7 +42,8 @@ class AsyncHyperBandScheduler(FIFOScheduler):

def __init__(self,
time_attr="training_iteration",
reward_attr="episode_reward_mean",
metric="episode_reward_mean",
mode="max",
max_t=100,
grace_period=10,
reduction_factor=4,
Expand All @@ -50,6 +53,7 @@ def __init__(self,
assert grace_period > 0, "grace_period must be positive!"
assert reduction_factor > 1, "Reduction Factor not valid!"
assert brackets > 0, "brackets must be positive!"
assert mode in ["min", "max"], "mode must be 'min' or 'max'!"
FIFOScheduler.__init__(self)
self._reduction_factor = reduction_factor
self._max_t = max_t
Expand All @@ -63,9 +67,14 @@ def __init__(self,
]
self._counter = 0 # for
self._num_stopped = 0
self._reward_attr = reward_attr
self._metric = metric
self._time_attr = time_attr

if mode == "max":
self._metric_op = 1.
elif mode == "min":
self._metric_op = -1.

def on_trial_add(self, trial_runner, trial):
sizes = np.array([len(b._rungs) for b in self._brackets])
probs = np.e**(sizes - sizes.max())
Expand All @@ -80,15 +89,15 @@ def on_trial_result(self, trial_runner, trial, result):
else:
bracket = self._trial_info[trial.trial_id]
action = bracket.on_result(trial, result[self._time_attr],
result[self._reward_attr])
self._metric_op * result[self._metric])
if action == TrialScheduler.STOP:
self._num_stopped += 1
return action

def on_trial_complete(self, trial_runner, trial, result):
bracket = self._trial_info[trial.trial_id]
bracket.on_result(trial, result[self._time_attr],
result[self._reward_attr])
self._metric_op * result[self._metric])
del self._trial_info[trial.trial_id]

def on_trial_remove(self, trial_runner, trial):
Expand Down
26 changes: 18 additions & 8 deletions python/ray/tune/schedulers/hyperband.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ class HyperBandScheduler(FIFOScheduler):
To use this implementation of HyperBand with Tune, all you need
to do is specify the max length of time a trial can run `max_t`, the time
units `time_attr`, and the name of the reported objective value
`reward_attr`. We automatically determine reasonable values for the other
units `time_attr`, the name of the reported objective value `metric`,
and if `metric` is to be maximized or minimized (`mode`).
We automatically determine reasonable values for the other
HyperBand parameters based on the given values.
For example, to limit trials to 10 minutes and early stop based on the
Expand All @@ -62,9 +63,11 @@ class HyperBandScheduler(FIFOScheduler):
Note that you can pass in something non-temporal such as
`training_iteration` as a measure of progress, the only requirement
is that the attribute should increase monotonically.
reward_attr (str): The training result objective value attribute. As
metric (str): The training result objective value attribute. As
with `time_attr`, this may refer to any objective value. Stopping
procedures will use this attribute.
mode (str): One of {min, max}. Determines whether objective is minimizing
maximizing the metric attribute
max_t (int): max time units per trial. Trials will be stopped after
max_t time units (determined by time_attr) have passed.
The scheduler will terminate trials after this time has passed.
Expand All @@ -74,9 +77,11 @@ class HyperBandScheduler(FIFOScheduler):

def __init__(self,
time_attr="training_iteration",
reward_attr="episode_reward_mean",
metric="episode_reward_mean",
mode="max",
max_t=81):
assert max_t > 0, "Max (time_attr) not valid!"
assert mode in ["min", "max"], "mode must be 'min' or 'max'!"
FIFOScheduler.__init__(self)
self._eta = 3
self._s_max_1 = 5
Expand All @@ -92,9 +97,14 @@ def __init__(self,
# Tracks state for new trial add
self._state = {"bracket": None, "band_idx": 0}
self._num_stopped = 0
self._reward_attr = reward_attr
self._metric = metric
self._time_attr = time_attr

if mode == "max":
self._metric_op = 1.
elif mode == "min":
self._metric_op = -1.

def on_trial_add(self, trial_runner, trial):
"""Adds new trial.
Expand Down Expand Up @@ -173,7 +183,7 @@ def _process_bracket(self, trial_runner, bracket, trial):
bracket.cleanup_full(trial_runner)
return TrialScheduler.STOP

good, bad = bracket.successive_halving(self._reward_attr)
good, bad = bracket.successive_halving(self._metric, self._metric_op)
# kill bad trials
self._num_stopped += len(bad)
for t in bad:
Expand Down Expand Up @@ -322,7 +332,7 @@ def filled(self):

return len(self._live_trials) == self._n

def successive_halving(self, reward_attr):
def successive_halving(self, metric, metric_op):
assert self._halves > 0
self._halves -= 1
self._n /= self._eta
Expand All @@ -332,7 +342,7 @@ def successive_halving(self, reward_attr):
self._r = int(min(self._r, self._max_t_attr - self._cumul_r))
self._cumul_r += self._r
sorted_trials = sorted(
self._live_trials, key=lambda t: self._live_trials[t][reward_attr])
self._live_trials, key=lambda t: metric_op * self._live_trials[t][metric])

good, bad = sorted_trials[-self._n:], sorted_trials[:-self._n]
return good, bad
Expand Down
24 changes: 16 additions & 8 deletions python/ray/tune/schedulers/median_stopping_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ class MedianStoppingRule(FIFOScheduler):
Note that you can pass in something non-temporal such as
`training_iteration` as a measure of progress, the only requirement
is that the attribute should increase monotonically.
reward_attr (str): The training result objective value attribute. As
with `time_attr`, this may refer to any objective value that
is supposed to increase with time.
metric (str): The training result objective value attribute. As
with `time_attr`, this may refer to any objective value. Stopping
procedures will use this attribute.
mode (str): One of {min, max}. Determines whether objective is minimizing
maximizing the metric attribute
grace_period (float): Only stop trials at least this old in time.
The units are the same as the attribute named by `time_attr`.
min_samples_required (int): Min samples to compute median over.
Expand All @@ -37,7 +39,8 @@ class MedianStoppingRule(FIFOScheduler):

def __init__(self,
time_attr="time_total_s",
reward_attr="episode_reward_mean",
metric="episode_reward_mean",
mode="max",
grace_period=60.0,
min_samples_required=3,
hard_stop=True,
Expand All @@ -48,11 +51,16 @@ def __init__(self,
self._results = collections.defaultdict(list)
self._grace_period = grace_period
self._min_samples_required = min_samples_required
self._reward_attr = reward_attr
self._metric = metric
self._time_attr = time_attr
self._hard_stop = hard_stop
self._verbose = verbose

if mode == "max":
self._metric_op = 1.
elif mode == "min":
self._metric_op = -1.

def on_trial_result(self, trial_runner, trial, result):
"""Callback for early stopping.
Expand Down Expand Up @@ -110,11 +118,11 @@ def _running_result(self, trial, t_max=float("inf")):
results = self._results[trial]
# TODO(ekl) we could do interpolation to be more precise, but for now
# assume len(results) is large and the time diffs are roughly equal
return np.mean([
r[self._reward_attr] for r in results
return self._metric_op * np.mean([
r[self._metric] for r in results
if r[self._time_attr] <= t_max
])

def _best_result(self, trial):
results = self._results[trial]
return max(r[self._reward_attr] for r in results)
return max(self._metric_op * r[self._metric] for r in results)
19 changes: 14 additions & 5 deletions python/ray/tune/schedulers/pbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,11 @@ class PopulationBasedTraining(FIFOScheduler):
Note that you can pass in something non-temporal such as
`training_iteration` as a measure of progress, the only requirement
is that the attribute should increase monotonically.
reward_attr (str): The training result objective value attribute. As
metric (str): The training result objective value attribute. As
with `time_attr`, this may refer to any objective value. Stopping
procedures will use this attribute.
mode (str): One of {min, max}. Determines whether objective is minimizing
maximizing the metric attribute
perturbation_interval (float): Models will be considered for
perturbation at this interval of `time_attr`. Note that
perturbation incurs checkpoint overhead, so you shouldn't set this
Expand All @@ -149,7 +151,8 @@ class PopulationBasedTraining(FIFOScheduler):
Example:
>>> pbt = PopulationBasedTraining(
>>> time_attr="training_iteration",
>>> reward_attr="episode_reward_mean",
>>> metric="episode_reward_mean",
>>> mode="max",
>>> perturbation_interval=10, # every 10 `time_attr` units
>>> # (training_iterations in this case)
>>> hyperparam_mutations={
Expand All @@ -165,7 +168,8 @@ class PopulationBasedTraining(FIFOScheduler):

def __init__(self,
time_attr="time_total_s",
reward_attr="episode_reward_mean",
metric="episode_reward_mean",
mode="max",
perturbation_interval=60.0,
hyperparam_mutations={},
resample_probability=0.25,
Expand All @@ -176,7 +180,7 @@ def __init__(self,
"You must specify at least one of `hyperparam_mutations` or "
"`custom_explore_fn` to use PBT.")
FIFOScheduler.__init__(self)
self._reward_attr = reward_attr
self._metric = metric
self._time_attr = time_attr
self._perturbation_interval = perturbation_interval
self._hyperparam_mutations = hyperparam_mutations
Expand All @@ -185,6 +189,11 @@ def __init__(self,
self._custom_explore_fn = custom_explore_fn
self._log_config = log_config

if mode == "max":
self._metric_op = 1.
elif mode == "min":
self._metric_op = -1.

# Metrics
self._num_checkpoints = 0
self._num_perturbations = 0
Expand All @@ -199,7 +208,7 @@ def on_trial_result(self, trial_runner, trial, result):
if time - state.last_perturbation_time < self._perturbation_interval:
return TrialScheduler.CONTINUE # avoid checkpoint overhead

score = result[self._reward_attr]
score = self.metric_op * result[self._metric]
state.last_score = score
state.last_perturbation_time = time
lower_quantile, upper_quantile = self._quantiles()
Expand Down

0 comments on commit 8f69bb1

Please sign in to comment.