schedulers: reward_attr -> metric, mode (ray-project#4120)

hershg · May 27, 2019 · 4316f60 · 4316f60
1 parent 7a78e1e
commit 4316f60
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 26 deletions.
diff --git a/python/ray/tune/schedulers/async_hyperband.py b/python/ray/tune/schedulers/async_hyperband.py
@@ -25,9 +25,11 @@ class AsyncHyperBandScheduler(FIFOScheduler):
             Note that you can pass in something non-temporal such as
             `training_iteration` as a measure of progress, the only requirement
             is that the attribute should increase monotonically.
-        reward_attr (str): The training result objective value attribute. As
+        metric (str): The training result objective value attribute. As
             with `time_attr`, this may refer to any objective value. Stopping
             procedures will use this attribute.
+        mode (str): One of {min, max}. Determines whether objective is minimizing
+            maximizing the metric attribute
         max_t (float): max time units per trial. Trials will be stopped after
             max_t time units (determined by time_attr) have passed.
         grace_period (float): Only stop trials at least this old in time.
@@ -40,7 +42,8 @@ class AsyncHyperBandScheduler(FIFOScheduler):
 
     def __init__(self,
                  time_attr="training_iteration",
-                 reward_attr="episode_reward_mean",
+                 metric="episode_reward_mean",
+                 mode="max",
                  max_t=100,
                  grace_period=10,
                  reduction_factor=4,
@@ -50,6 +53,7 @@ def __init__(self,
         assert grace_period > 0, "grace_period must be positive!"
         assert reduction_factor > 1, "Reduction Factor not valid!"
         assert brackets > 0, "brackets must be positive!"
+        assert mode in ["min", "max"], "mode must be 'min' or 'max'!"
         FIFOScheduler.__init__(self)
         self._reduction_factor = reduction_factor
         self._max_t = max_t
@@ -63,9 +67,14 @@ def __init__(self,
         ]
         self._counter = 0  # for
         self._num_stopped = 0
-        self._reward_attr = reward_attr
+        self._metric = metric
         self._time_attr = time_attr
 
+        if mode == "max":
+            self._metric_op = 1.
+        elif mode == "min":
+            self._metric_op = -1.
+
     def on_trial_add(self, trial_runner, trial):
         sizes = np.array([len(b._rungs) for b in self._brackets])
         probs = np.e**(sizes - sizes.max())
@@ -80,15 +89,15 @@ def on_trial_result(self, trial_runner, trial, result):
         else:
             bracket = self._trial_info[trial.trial_id]
             action = bracket.on_result(trial, result[self._time_attr],
-                                       result[self._reward_attr])
+                                       self._metric_op * result[self._metric])
         if action == TrialScheduler.STOP:
             self._num_stopped += 1
         return action
 
     def on_trial_complete(self, trial_runner, trial, result):
         bracket = self._trial_info[trial.trial_id]
         bracket.on_result(trial, result[self._time_attr],
-                          result[self._reward_attr])
+                          self._metric_op * result[self._metric])
         del self._trial_info[trial.trial_id]
 
     def on_trial_remove(self, trial_runner, trial):

diff --git a/python/ray/tune/schedulers/hyperband.py b/python/ray/tune/schedulers/hyperband.py
@@ -43,8 +43,9 @@ class HyperBandScheduler(FIFOScheduler):
 
     To use this implementation of HyperBand with Tune, all you need
     to do is specify the max length of time a trial can run `max_t`, the time
-    units `time_attr`, and the name of the reported objective value
-    `reward_attr`. We automatically determine reasonable values for the other
+    units `time_attr`, the name of the reported objective value `metric`,
+    and if `metric` is to be maximized or minimized (`mode`).
+    We automatically determine reasonable values for the other
     HyperBand parameters based on the given values.
 
     For example, to limit trials to 10 minutes and early stop based on the
@@ -62,9 +63,11 @@ class HyperBandScheduler(FIFOScheduler):
             Note that you can pass in something non-temporal such as
             `training_iteration` as a measure of progress, the only requirement
             is that the attribute should increase monotonically.
-        reward_attr (str): The training result objective value attribute. As
+        metric (str): The training result objective value attribute. As
             with `time_attr`, this may refer to any objective value. Stopping
             procedures will use this attribute.
+        mode (str): One of {min, max}. Determines whether objective is minimizing
+            maximizing the metric attribute
         max_t (int): max time units per trial. Trials will be stopped after
             max_t time units (determined by time_attr) have passed.
             The scheduler will terminate trials after this time has passed.
@@ -74,9 +77,11 @@ class HyperBandScheduler(FIFOScheduler):
 
     def __init__(self,
                  time_attr="training_iteration",
-                 reward_attr="episode_reward_mean",
+                 metric="episode_reward_mean",
+                 mode="max",
                  max_t=81):
         assert max_t > 0, "Max (time_attr) not valid!"
+        assert mode in ["min", "max"], "mode must be 'min' or 'max'!"
         FIFOScheduler.__init__(self)
         self._eta = 3
         self._s_max_1 = 5
@@ -92,9 +97,14 @@ def __init__(self,
         # Tracks state for new trial add
         self._state = {"bracket": None, "band_idx": 0}
         self._num_stopped = 0
-        self._reward_attr = reward_attr
+        self._metric = metric
         self._time_attr = time_attr
 
+        if mode == "max":
+            self._metric_op = 1.
+        elif mode == "min":
+            self._metric_op = -1.
+
     def on_trial_add(self, trial_runner, trial):
         """Adds new trial.
 
@@ -173,7 +183,7 @@ def _process_bracket(self, trial_runner, bracket, trial):
                 bracket.cleanup_full(trial_runner)
                 return TrialScheduler.STOP
 
-            good, bad = bracket.successive_halving(self._reward_attr)
+            good, bad = bracket.successive_halving(self._metric, self._metric_op)
             # kill bad trials
             self._num_stopped += len(bad)
             for t in bad:
@@ -322,7 +332,7 @@ def filled(self):
 
         return len(self._live_trials) == self._n
 
-    def successive_halving(self, reward_attr):
+    def successive_halving(self, metric, metric_op):
         assert self._halves > 0
         self._halves -= 1
         self._n /= self._eta
@@ -332,7 +342,7 @@ def successive_halving(self, reward_attr):
         self._r = int(min(self._r, self._max_t_attr - self._cumul_r))
         self._cumul_r += self._r
         sorted_trials = sorted(
-            self._live_trials, key=lambda t: self._live_trials[t][reward_attr])
+            self._live_trials, key=lambda t: metric_op * self._live_trials[t][metric])
 
         good, bad = sorted_trials[-self._n:], sorted_trials[:-self._n]
         return good, bad

diff --git a/python/ray/tune/schedulers/median_stopping_rule.py b/python/ray/tune/schedulers/median_stopping_rule.py
@@ -22,9 +22,11 @@ class MedianStoppingRule(FIFOScheduler):
             Note that you can pass in something non-temporal such as
             `training_iteration` as a measure of progress, the only requirement
             is that the attribute should increase monotonically.
-        reward_attr (str): The training result objective value attribute. As
-            with `time_attr`, this may refer to any objective value that
-            is supposed to increase with time.
+        metric (str): The training result objective value attribute. As
+            with `time_attr`, this may refer to any objective value. Stopping
+            procedures will use this attribute.
+        mode (str): One of {min, max}. Determines whether objective is minimizing
+            maximizing the metric attribute
         grace_period (float): Only stop trials at least this old in time.
             The units are the same as the attribute named by `time_attr`.
         min_samples_required (int): Min samples to compute median over.
@@ -37,7 +39,8 @@ class MedianStoppingRule(FIFOScheduler):
 
     def __init__(self,
                  time_attr="time_total_s",
-                 reward_attr="episode_reward_mean",
+                 metric="episode_reward_mean",
+                 mode="max",
                  grace_period=60.0,
                  min_samples_required=3,
                  hard_stop=True,
@@ -48,11 +51,16 @@ def __init__(self,
         self._results = collections.defaultdict(list)
         self._grace_period = grace_period
         self._min_samples_required = min_samples_required
-        self._reward_attr = reward_attr
+        self._metric = metric
         self._time_attr = time_attr
         self._hard_stop = hard_stop
         self._verbose = verbose
 
+        if mode == "max":
+            self._metric_op = 1.
+        elif mode == "min":
+            self._metric_op = -1.
+
     def on_trial_result(self, trial_runner, trial, result):
         """Callback for early stopping.
 
@@ -110,11 +118,11 @@ def _running_result(self, trial, t_max=float("inf")):
         results = self._results[trial]
         # TODO(ekl) we could do interpolation to be more precise, but for now
         # assume len(results) is large and the time diffs are roughly equal
-        return np.mean([
-            r[self._reward_attr] for r in results
+        return self._metric_op * np.mean([
+            r[self._metric] for r in results
             if r[self._time_attr] <= t_max
         ])
 
     def _best_result(self, trial):
         results = self._results[trial]
-        return max(r[self._reward_attr] for r in results)
+        return max(self._metric_op * r[self._metric] for r in results)
diff --git a/python/ray/tune/schedulers/pbt.py b/python/ray/tune/schedulers/pbt.py
@@ -120,9 +120,11 @@ class PopulationBasedTraining(FIFOScheduler):
             Note that you can pass in something non-temporal such as
             `training_iteration` as a measure of progress, the only requirement
             is that the attribute should increase monotonically.
-        reward_attr (str): The training result objective value attribute. As
+        metric (str): The training result objective value attribute. As
             with `time_attr`, this may refer to any objective value. Stopping
             procedures will use this attribute.
+        mode (str): One of {min, max}. Determines whether objective is minimizing
+            maximizing the metric attribute
         perturbation_interval (float): Models will be considered for
             perturbation at this interval of `time_attr`. Note that
             perturbation incurs checkpoint overhead, so you shouldn't set this
@@ -149,7 +151,8 @@ class PopulationBasedTraining(FIFOScheduler):
     Example:
         >>> pbt = PopulationBasedTraining(
         >>>     time_attr="training_iteration",
-        >>>     reward_attr="episode_reward_mean",
+        >>>     metric="episode_reward_mean",
+        >>>     mode="max",
         >>>     perturbation_interval=10,  # every 10 `time_attr` units
         >>>                                # (training_iterations in this case)
         >>>     hyperparam_mutations={
@@ -165,7 +168,8 @@ class PopulationBasedTraining(FIFOScheduler):
 
     def __init__(self,
                  time_attr="time_total_s",
-                 reward_attr="episode_reward_mean",
+                 metric="episode_reward_mean",
+                 mode="max",
                  perturbation_interval=60.0,
                  hyperparam_mutations={},
                  resample_probability=0.25,
@@ -176,7 +180,7 @@ def __init__(self,
                 "You must specify at least one of `hyperparam_mutations` or "
                 "`custom_explore_fn` to use PBT.")
         FIFOScheduler.__init__(self)
-        self._reward_attr = reward_attr
+        self._metric = metric
         self._time_attr = time_attr
         self._perturbation_interval = perturbation_interval
         self._hyperparam_mutations = hyperparam_mutations
@@ -185,6 +189,11 @@ def __init__(self,
         self._custom_explore_fn = custom_explore_fn
         self._log_config = log_config
 
+        if mode == "max":
+            self._metric_op = 1.
+        elif mode == "min":
+            self._metric_op = -1.
+
         # Metrics
         self._num_checkpoints = 0
         self._num_perturbations = 0
@@ -199,7 +208,7 @@ def on_trial_result(self, trial_runner, trial, result):
         if time - state.last_perturbation_time < self._perturbation_interval:
             return TrialScheduler.CONTINUE  # avoid checkpoint overhead
 
-        score = result[self._reward_attr]
+        score = self.metric_op * result[self._metric]
         state.last_score = score
         state.last_perturbation_time = time
         lower_quantile, upper_quantile = self._quantiles()