ray-project · richardliaw · Mar 17, 2019 · Mar 12, 2019 · Mar 12, 2019 · Mar 12, 2019
@@ -48,9 +48,13 @@ def easy_objective(config, reporter):
         }
     }
     optimizer = Optimizer([(0, 20), (-100, 100)])
+    previously_run_params = [[10, 0], [15, -20]]
+    known_rewards = [-189, -1144]
     algo = SkOptSearch(
         optimizer, ["width", "height"],
         max_concurrent=4,
-        reward_attr="neg_mean_loss")
+        reward_attr="neg_mean_loss",
+        points_to_evaluate=previously_run_params,
+        evaluated_rewards=known_rewards)
     scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
     run_experiments(config, search_alg=algo, scheduler=scheduler)
@@ -1,6 +1,8 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import numbers
+from collections import Iterable
 
 try:
     import skopt
@@ -24,10 +26,23 @@ class SkOptSearch(SuggestionAlgorithm):
             to 10.
         reward_attr (str): The training result objective value attribute.
             This refers to an increasing value.
+        points_to_evaluate (list of lists): A list of trials you'd like to run
+            first before sampling from the optimiser, e.g. these could be
+            parameter configurations you already know work well to help
+            the optimiser select good values. Each trial is a list of the
+            parameters of that trial using the order definition given
+            to the optimiser (see example below)
+        evaluated_rewards (list): If you have previously evaluated the
+            parameters passed in as points_to_evaluate you can avoid
+            re-running those trials by passing in the reward attributes
+            as a list so the optimiser can be told the results without
+            needing to re-compute the trial. Must be the same length as
+            points_to_evaluate. (See skopt_example.py)
 
     Example:
         >>> from skopt import Optimizer
         >>> optimizer = Optimizer([(0,20),(-100,100)])
+        >>> current_best_params = [[10, 0], [15, -20]]
         >>> config = {
         >>>     "my_exp": {
         >>>         "run": "exp",
@@ -39,19 +54,47 @@ class SkOptSearch(SuggestionAlgorithm):
         >>> }
         >>> algo = SkOptSearch(optimizer,
         >>>     ["width", "height"], max_concurrent=4,
-        >>>     reward_attr="neg_mean_loss")
+        >>>     reward_attr="neg_mean_loss", points_to_evaluate=current_best_params)
     """
 
     def __init__(self,
                  optimizer,
                  parameter_names,
                  max_concurrent=10,
                  reward_attr="episode_reward_mean",
+                 points_to_evaluate=None,
+                 evaluated_rewards=None,
                  **kwargs):
         assert skopt is not None, """skopt must be installed!
             You can install Skopt with the command:
             `pip install scikit-optimize`."""
         assert type(max_concurrent) is int and max_concurrent > 0
+        if points_to_evaluate is None:
+            points_to_evaluate = []
+        elif not isinstance(points_to_evaluate[0], (list, tuple)):
+            points_to_evaluate = [points_to_evaluate]
+        if not isinstance(points_to_evaluate, list):
+            raise ValueError(
+                "`points_to_evaluate` should be a list, but got %s" %
+                type(points_to_evaluate))
+        if isinstance(evaluated_rewards, Iterable):
+            evaluated_rewards = list(evaluated_rewards)
+        elif isinstance(evaluated_rewards, numbers.Number):
+            evaluated_rewards = [evaluated_rewards]
+        self._initial_points = []
+        if points_to_evaluate and evaluated_rewards:
+            if not (isinstance(evaluated_rewards, Iterable)
+                    or isinstance(evaluated_rewards, numbers.Number)):
+                raise ValueError(
+                    "`evaluated_rewards` should be an iterable or a scalar, got %s"
+                    % type(evaluated_rewards))
+            if len(points_to_evaluate) != len(evaluated_rewards):
+                raise ValueError(
+                    "`points_to_evaluate` and `evaluated_rewards` should have the same length"
+                )
+            optimizer.tell(points_to_evaluate, evaluated_rewards)
+        elif points_to_evaluate:
+            self._initial_points = points_to_evaluate
         self._max_concurrent = max_concurrent
         self._parameters = parameter_names
         self._reward_attr = reward_attr
@@ -62,7 +105,11 @@ def __init__(self,
     def _suggest(self, trial_id):
         if self._num_live_trials() >= self._max_concurrent:
             return None
-        suggested_config = self._skopt_opt.ask()
+        if self._initial_points:
+            suggested_config = self._initial_points[0]
+            del self._initial_points[0]
+        else:
+            suggested_config = self._skopt_opt.ask()
         self._live_trial_mapping[trial_id] = suggested_config
         return dict(zip(self._parameters, suggested_config))