From c294a91a709a3ae7b0e9c97f8d6f96efe3829e3a Mon Sep 17 00:00:00 2001 From: mark Date: Tue, 12 Mar 2019 16:38:46 +0000 Subject: [PATCH 01/11] Support initial parameters for SkOpt search algorithm --- python/ray/tune/examples/skopt_example.py | 6 ++- python/ray/tune/suggest/skopt.py | 51 ++++++++++++++++++++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/python/ray/tune/examples/skopt_example.py b/python/ray/tune/examples/skopt_example.py index 71ac60951528..5112ebc7ba02 100644 --- a/python/ray/tune/examples/skopt_example.py +++ b/python/ray/tune/examples/skopt_example.py @@ -48,9 +48,13 @@ def easy_objective(config, reporter): } } optimizer = Optimizer([(0, 20), (-100, 100)]) + previously_run_params = [[10, 0], [15, -20]] + known_rewards = [-189, -1144] algo = SkOptSearch( optimizer, ["width", "height"], max_concurrent=4, - reward_attr="neg_mean_loss") + reward_attr="neg_mean_loss", + points_to_evaluate=previously_run_params, + evaluated_rewards=known_rewards) scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss") run_experiments(config, search_alg=algo, scheduler=scheduler) diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index 039c9d015b94..5618b50521b6 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -1,6 +1,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numbers +from collections import Iterable try: import skopt @@ -24,10 +26,23 @@ class SkOptSearch(SuggestionAlgorithm): to 10. reward_attr (str): The training result objective value attribute. This refers to an increasing value. + points_to_evaluate (list of lists): A list of trials you'd like to run + first before sampling from the optimiser, e.g. these could be + parameter configurations you already know work well to help + the optimiser select good values. Each trial is a list of the + parameters of that trial using the order definition given + to the optimiser (see example below) + evaluated_rewards (list): If you have previously evaluated the + parameters passed in as points_to_evaluate you can avoid + re-running those trials by passing in the reward attributes + as a list so the optimiser can be told the results without + needing to re-compute the trial. Must be the same length as + points_to_evaluate. (See skopt_example.py) Example: >>> from skopt import Optimizer >>> optimizer = Optimizer([(0,20),(-100,100)]) + >>> current_best_params = [[10, 0], [15, -20]] >>> config = { >>> "my_exp": { >>> "run": "exp", @@ -39,7 +54,7 @@ class SkOptSearch(SuggestionAlgorithm): >>> } >>> algo = SkOptSearch(optimizer, >>> ["width", "height"], max_concurrent=4, - >>> reward_attr="neg_mean_loss") + >>> reward_attr="neg_mean_loss", points_to_evaluate=current_best_params) """ def __init__(self, @@ -47,11 +62,39 @@ def __init__(self, parameter_names, max_concurrent=10, reward_attr="episode_reward_mean", + points_to_evaluate=None, + evaluated_rewards=None, **kwargs): assert skopt is not None, """skopt must be installed! You can install Skopt with the command: `pip install scikit-optimize`.""" assert type(max_concurrent) is int and max_concurrent > 0 + if points_to_evaluate is None: + points_to_evaluate = [] + elif not isinstance(points_to_evaluate[0], (list, tuple)): + points_to_evaluate = [points_to_evaluate] + if not isinstance(points_to_evaluate, list): + raise ValueError( + "`points_to_evaluate` should be a list, but got %s" % + type(points_to_evaluate)) + if isinstance(evaluated_rewards, Iterable): + evaluated_rewards = list(evaluated_rewards) + elif isinstance(evaluated_rewards, numbers.Number): + evaluated_rewards = [evaluated_rewards] + self._initial_points = [] + if points_to_evaluate and evaluated_rewards: + if not (isinstance(evaluated_rewards, Iterable) + or isinstance(evaluated_rewards, numbers.Number)): + raise ValueError( + "`evaluated_rewards` should be an iterable or a scalar, got %s" + % type(evaluated_rewards)) + if len(points_to_evaluate) != len(evaluated_rewards): + raise ValueError( + "`points_to_evaluate` and `evaluated_rewards` should have the same length" + ) + optimizer.tell(points_to_evaluate, evaluated_rewards) + elif points_to_evaluate: + self._initial_points = points_to_evaluate self._max_concurrent = max_concurrent self._parameters = parameter_names self._reward_attr = reward_attr @@ -62,7 +105,11 @@ def __init__(self, def _suggest(self, trial_id): if self._num_live_trials() >= self._max_concurrent: return None - suggested_config = self._skopt_opt.ask() + if self._initial_points: + suggested_config = self._initial_points[0] + del self._initial_points[0] + else: + suggested_config = self._skopt_opt.ask() self._live_trial_mapping[trial_id] = suggested_config return dict(zip(self._parameters, suggested_config)) From 9876e5aff99a2d6c676e3e8af00fa89e944baf01 Mon Sep 17 00:00:00 2001 From: Adi Date: Tue, 12 Mar 2019 14:43:20 -0700 Subject: [PATCH 02/11] initial commit --- python/ray/tune/suggest/skopt.py | 49 +++++++++++++++++++------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index 5618b50521b6..2d3fdbcb1261 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -67,34 +67,21 @@ def __init__(self, **kwargs): assert skopt is not None, """skopt must be installed! You can install Skopt with the command: - `pip install scikit-optimize`.""" - assert type(max_concurrent) is int and max_concurrent > 0 - if points_to_evaluate is None: - points_to_evaluate = [] - elif not isinstance(points_to_evaluate[0], (list, tuple)): - points_to_evaluate = [points_to_evaluate] - if not isinstance(points_to_evaluate, list): - raise ValueError( - "`points_to_evaluate` should be a list, but got %s" % - type(points_to_evaluate)) - if isinstance(evaluated_rewards, Iterable): - evaluated_rewards = list(evaluated_rewards) - elif isinstance(evaluated_rewards, numbers.Number): - evaluated_rewards = [evaluated_rewards] + `pip install scikit-optimize`.""" + assert type(max_concurrent) is int and max_concurrent > 0 + if points_to_evaluate: + self._validate_points_to_evaluate(points_to_evaluate, len(parameter_names)) + if evaluated_rewards: + self._validate_evaluated_rewards(evaluated_rewards) self._initial_points = [] if points_to_evaluate and evaluated_rewards: - if not (isinstance(evaluated_rewards, Iterable) - or isinstance(evaluated_rewards, numbers.Number)): - raise ValueError( - "`evaluated_rewards` should be an iterable or a scalar, got %s" - % type(evaluated_rewards)) if len(points_to_evaluate) != len(evaluated_rewards): raise ValueError( "`points_to_evaluate` and `evaluated_rewards` should have the same length" ) optimizer.tell(points_to_evaluate, evaluated_rewards) elif points_to_evaluate: - self._initial_points = points_to_evaluate + self._initial_points = points_to_evaluate self._max_concurrent = max_concurrent self._parameters = parameter_names self._reward_attr = reward_attr @@ -102,6 +89,28 @@ def __init__(self, self._live_trial_mapping = {} super(SkOptSearch, self).__init__(**kwargs) + def _validate_points_to_evaluate(self, points, dimension): + if not isinstance(points, list): + raise TypeError( + "`points_to_evaluate` should be a list, but got %s" % + type(points)) + for point in points: + if not isinstance(point, list): + raise TypeError( + "`points_to_evaluate` should be a list, but got %s" % + type(point)) + if len(point) != dimension: + raise TypeError( + """each point in `points_to_evaluate` should + have the same dimensions as `parameter_names`""" + ) + + def _validate_evaluated_rewards(self, rewards): + if not isinstance(rewards, list): + raise TypeError( + "`evaluated_rewards` should be a list, but got %s" % + type(points_to_evaluate)) + def _suggest(self, trial_id): if self._num_live_trials() >= self._max_concurrent: return None From c5a28efb3e03a2325a9ca994d835509d0bdf41e2 Mon Sep 17 00:00:00 2001 From: mark Date: Tue, 12 Mar 2019 16:38:46 +0000 Subject: [PATCH 03/11] Support initial parameters for SkOpt search algorithm --- python/ray/tune/examples/skopt_example.py | 6 ++- python/ray/tune/suggest/skopt.py | 51 ++++++++++++++++++++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/python/ray/tune/examples/skopt_example.py b/python/ray/tune/examples/skopt_example.py index 71ac60951528..5112ebc7ba02 100644 --- a/python/ray/tune/examples/skopt_example.py +++ b/python/ray/tune/examples/skopt_example.py @@ -48,9 +48,13 @@ def easy_objective(config, reporter): } } optimizer = Optimizer([(0, 20), (-100, 100)]) + previously_run_params = [[10, 0], [15, -20]] + known_rewards = [-189, -1144] algo = SkOptSearch( optimizer, ["width", "height"], max_concurrent=4, - reward_attr="neg_mean_loss") + reward_attr="neg_mean_loss", + points_to_evaluate=previously_run_params, + evaluated_rewards=known_rewards) scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss") run_experiments(config, search_alg=algo, scheduler=scheduler) diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index 039c9d015b94..5618b50521b6 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -1,6 +1,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numbers +from collections import Iterable try: import skopt @@ -24,10 +26,23 @@ class SkOptSearch(SuggestionAlgorithm): to 10. reward_attr (str): The training result objective value attribute. This refers to an increasing value. + points_to_evaluate (list of lists): A list of trials you'd like to run + first before sampling from the optimiser, e.g. these could be + parameter configurations you already know work well to help + the optimiser select good values. Each trial is a list of the + parameters of that trial using the order definition given + to the optimiser (see example below) + evaluated_rewards (list): If you have previously evaluated the + parameters passed in as points_to_evaluate you can avoid + re-running those trials by passing in the reward attributes + as a list so the optimiser can be told the results without + needing to re-compute the trial. Must be the same length as + points_to_evaluate. (See skopt_example.py) Example: >>> from skopt import Optimizer >>> optimizer = Optimizer([(0,20),(-100,100)]) + >>> current_best_params = [[10, 0], [15, -20]] >>> config = { >>> "my_exp": { >>> "run": "exp", @@ -39,7 +54,7 @@ class SkOptSearch(SuggestionAlgorithm): >>> } >>> algo = SkOptSearch(optimizer, >>> ["width", "height"], max_concurrent=4, - >>> reward_attr="neg_mean_loss") + >>> reward_attr="neg_mean_loss", points_to_evaluate=current_best_params) """ def __init__(self, @@ -47,11 +62,39 @@ def __init__(self, parameter_names, max_concurrent=10, reward_attr="episode_reward_mean", + points_to_evaluate=None, + evaluated_rewards=None, **kwargs): assert skopt is not None, """skopt must be installed! You can install Skopt with the command: `pip install scikit-optimize`.""" assert type(max_concurrent) is int and max_concurrent > 0 + if points_to_evaluate is None: + points_to_evaluate = [] + elif not isinstance(points_to_evaluate[0], (list, tuple)): + points_to_evaluate = [points_to_evaluate] + if not isinstance(points_to_evaluate, list): + raise ValueError( + "`points_to_evaluate` should be a list, but got %s" % + type(points_to_evaluate)) + if isinstance(evaluated_rewards, Iterable): + evaluated_rewards = list(evaluated_rewards) + elif isinstance(evaluated_rewards, numbers.Number): + evaluated_rewards = [evaluated_rewards] + self._initial_points = [] + if points_to_evaluate and evaluated_rewards: + if not (isinstance(evaluated_rewards, Iterable) + or isinstance(evaluated_rewards, numbers.Number)): + raise ValueError( + "`evaluated_rewards` should be an iterable or a scalar, got %s" + % type(evaluated_rewards)) + if len(points_to_evaluate) != len(evaluated_rewards): + raise ValueError( + "`points_to_evaluate` and `evaluated_rewards` should have the same length" + ) + optimizer.tell(points_to_evaluate, evaluated_rewards) + elif points_to_evaluate: + self._initial_points = points_to_evaluate self._max_concurrent = max_concurrent self._parameters = parameter_names self._reward_attr = reward_attr @@ -62,7 +105,11 @@ def __init__(self, def _suggest(self, trial_id): if self._num_live_trials() >= self._max_concurrent: return None - suggested_config = self._skopt_opt.ask() + if self._initial_points: + suggested_config = self._initial_points[0] + del self._initial_points[0] + else: + suggested_config = self._skopt_opt.ask() self._live_trial_mapping[trial_id] = suggested_config return dict(zip(self._parameters, suggested_config)) From ea3e6ff1a26bd993bdf2a5b63efde1ea12ae3dbd Mon Sep 17 00:00:00 2001 From: Adi Date: Tue, 12 Mar 2019 14:43:20 -0700 Subject: [PATCH 04/11] initial commit --- python/ray/tune/suggest/skopt.py | 49 +++++++++++++++++++------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index 5618b50521b6..2d3fdbcb1261 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -67,34 +67,21 @@ def __init__(self, **kwargs): assert skopt is not None, """skopt must be installed! You can install Skopt with the command: - `pip install scikit-optimize`.""" - assert type(max_concurrent) is int and max_concurrent > 0 - if points_to_evaluate is None: - points_to_evaluate = [] - elif not isinstance(points_to_evaluate[0], (list, tuple)): - points_to_evaluate = [points_to_evaluate] - if not isinstance(points_to_evaluate, list): - raise ValueError( - "`points_to_evaluate` should be a list, but got %s" % - type(points_to_evaluate)) - if isinstance(evaluated_rewards, Iterable): - evaluated_rewards = list(evaluated_rewards) - elif isinstance(evaluated_rewards, numbers.Number): - evaluated_rewards = [evaluated_rewards] + `pip install scikit-optimize`.""" + assert type(max_concurrent) is int and max_concurrent > 0 + if points_to_evaluate: + self._validate_points_to_evaluate(points_to_evaluate, len(parameter_names)) + if evaluated_rewards: + self._validate_evaluated_rewards(evaluated_rewards) self._initial_points = [] if points_to_evaluate and evaluated_rewards: - if not (isinstance(evaluated_rewards, Iterable) - or isinstance(evaluated_rewards, numbers.Number)): - raise ValueError( - "`evaluated_rewards` should be an iterable or a scalar, got %s" - % type(evaluated_rewards)) if len(points_to_evaluate) != len(evaluated_rewards): raise ValueError( "`points_to_evaluate` and `evaluated_rewards` should have the same length" ) optimizer.tell(points_to_evaluate, evaluated_rewards) elif points_to_evaluate: - self._initial_points = points_to_evaluate + self._initial_points = points_to_evaluate self._max_concurrent = max_concurrent self._parameters = parameter_names self._reward_attr = reward_attr @@ -102,6 +89,28 @@ def __init__(self, self._live_trial_mapping = {} super(SkOptSearch, self).__init__(**kwargs) + def _validate_points_to_evaluate(self, points, dimension): + if not isinstance(points, list): + raise TypeError( + "`points_to_evaluate` should be a list, but got %s" % + type(points)) + for point in points: + if not isinstance(point, list): + raise TypeError( + "`points_to_evaluate` should be a list, but got %s" % + type(point)) + if len(point) != dimension: + raise TypeError( + """each point in `points_to_evaluate` should + have the same dimensions as `parameter_names`""" + ) + + def _validate_evaluated_rewards(self, rewards): + if not isinstance(rewards, list): + raise TypeError( + "`evaluated_rewards` should be a list, but got %s" % + type(points_to_evaluate)) + def _suggest(self, trial_id): if self._num_live_trials() >= self._max_concurrent: return None From f1331f1649ef4c33d2accef2ad70af9c64d7ab7d Mon Sep 17 00:00:00 2001 From: Adi Date: Tue, 12 Mar 2019 15:45:01 -0700 Subject: [PATCH 05/11] Revert "[rllib] Add end-to-end tests for RNN sequencing (#4258)" This reverts commit 0e77a8f8c08dbb0dab99abe69d1ecf7579267a15. --- python/ray/rllib/tests/run_silent.sh | 4 +- python/ray/rllib/tests/test_lstm.py | 217 +-------------------------- 2 files changed, 3 insertions(+), 218 deletions(-) diff --git a/python/ray/rllib/tests/run_silent.sh b/python/ray/rllib/tests/run_silent.sh index 0ddca2b9c7be..e0a69dd7b50f 100755 --- a/python/ray/rllib/tests/run_silent.sh +++ b/python/ray/rllib/tests/run_silent.sh @@ -6,9 +6,9 @@ SCRIPT=$1 shift if [ -x $DIRECTORY/../$SCRIPT ]; then - time $DIRECTORY/../$SCRIPT "$@" >$TMPFILE 2>&1 + $DIRECTORY/../$SCRIPT "$@" >$TMPFILE 2>&1 else - time python $DIRECTORY/../$SCRIPT "$@" >$TMPFILE 2>&1 + python $DIRECTORY/../$SCRIPT "$@" >$TMPFILE 2>&1 fi CODE=$? diff --git a/python/ray/rllib/tests/test_lstm.py b/python/ray/rllib/tests/test_lstm.py index 304478eb85cb..abb9ad0ccb4a 100644 --- a/python/ray/rllib/tests/test_lstm.py +++ b/python/ray/rllib/tests/test_lstm.py @@ -2,20 +2,9 @@ from __future__ import division from __future__ import print_function -import gym -import numpy as np -import pickle import unittest -import tensorflow as tf -import tensorflow.contrib.rnn as rnn -import ray -from ray.rllib.agents.ppo import PPOAgent -from ray.rllib.models import ModelCatalog -from ray.rllib.models.lstm import add_time_dimension, chop_into_sequences -from ray.rllib.models.misc import linear, normc_initializer -from ray.rllib.models.model import Model -from ray.tune.registry import register_env +from ray.rllib.models.lstm import chop_into_sequences class LSTMUtilsTest(unittest.TestCase): @@ -59,209 +48,5 @@ def testDynamicMaxLen(self): self.assertEqual(seq_lens.tolist(), [1, 2]) -class RNNSpyModel(Model): - capture_index = 0 - - def _build_layers_v2(self, input_dict, num_outputs, options): - def spy(sequences, state_in, state_out, seq_lens): - if len(sequences) == 1: - return 0 # don't capture inference inputs - # TF runs this function in an isolated context, so we have to use - # redis to communicate back to our suite - ray.experimental.internal_kv._internal_kv_put( - "rnn_spy_in_{}".format(RNNSpyModel.capture_index), - pickle.dumps({ - "sequences": sequences, - "state_in": state_in, - "state_out": state_out, - "seq_lens": seq_lens - }), - overwrite=True) - RNNSpyModel.capture_index += 1 - return 0 - - features = input_dict["obs"] - cell_size = 3 - last_layer = add_time_dimension(features, self.seq_lens) - - # Setup the LSTM cell - lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True) - self.state_init = [ - np.zeros(lstm.state_size.c, np.float32), - np.zeros(lstm.state_size.h, np.float32) - ] - - # Setup LSTM inputs - if self.state_in: - c_in, h_in = self.state_in - else: - c_in = tf.placeholder( - tf.float32, [None, lstm.state_size.c], name="c") - h_in = tf.placeholder( - tf.float32, [None, lstm.state_size.h], name="h") - self.state_in = [c_in, h_in] - - # Setup LSTM outputs - state_in = rnn.LSTMStateTuple(c_in, h_in) - lstm_out, lstm_state = tf.nn.dynamic_rnn( - lstm, - last_layer, - initial_state=state_in, - sequence_length=self.seq_lens, - time_major=False, - dtype=tf.float32) - - self.state_out = list(lstm_state) - spy_fn = tf.py_func( - spy, [ - last_layer, - self.state_in, - self.state_out, - self.seq_lens, - ], - tf.int64, - stateful=True) - - # Compute outputs - with tf.control_dependencies([spy_fn]): - last_layer = tf.reshape(lstm_out, [-1, cell_size]) - logits = linear(last_layer, num_outputs, "action", - normc_initializer(0.01)) - return logits, last_layer - - -class DebugCounterEnv(gym.Env): - def __init__(self): - self.action_space = gym.spaces.Discrete(2) - self.observation_space = gym.spaces.Box(0, 100, (1, )) - self.i = 0 - - def reset(self): - self.i = 0 - return [self.i] - - def step(self, action): - self.i += 1 - return [self.i], self.i % 3, self.i >= 15, {} - - -class RNNSequencing(unittest.TestCase): - def testSimpleOptimizerSequencing(self): - ModelCatalog.register_custom_model("rnn", RNNSpyModel) - register_env("counter", lambda _: DebugCounterEnv()) - ppo = PPOAgent( - env="counter", - config={ - "num_workers": 0, - "sample_batch_size": 10, - "train_batch_size": 10, - "sgd_minibatch_size": 10, - "vf_share_layers": True, - "simple_optimizer": True, - "num_sgd_iter": 1, - "model": { - "custom_model": "rnn", - "max_seq_len": 4, - }, - }) - ppo.train() - ppo.train() - - batch0 = pickle.loads( - ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_0")) - self.assertEqual( - batch0["sequences"].tolist(), - [[[0], [1], [2], [3]], [[4], [5], [6], [7]], [[8], [9], [0], [0]]]) - self.assertEqual(batch0["seq_lens"].tolist(), [4, 4, 2]) - self.assertEqual(batch0["state_in"][0][0].tolist(), [0, 0, 0]) - self.assertEqual(batch0["state_in"][1][0].tolist(), [0, 0, 0]) - self.assertGreater(abs(np.sum(batch0["state_in"][0][1])), 0) - self.assertGreater(abs(np.sum(batch0["state_in"][1][1])), 0) - self.assertTrue( - np.allclose(batch0["state_in"][0].tolist()[1:], - batch0["state_out"][0].tolist()[:-1])) - self.assertTrue( - np.allclose(batch0["state_in"][1].tolist()[1:], - batch0["state_out"][1].tolist()[:-1])) - - batch1 = pickle.loads( - ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_1")) - self.assertEqual(batch1["sequences"].tolist(), [ - [[10], [11], [12], [13]], - [[14], [0], [0], [0]], - [[0], [1], [2], [3]], - [[4], [0], [0], [0]], - ]) - self.assertEqual(batch1["seq_lens"].tolist(), [4, 1, 4, 1]) - self.assertEqual(batch1["state_in"][0][2].tolist(), [0, 0, 0]) - self.assertEqual(batch1["state_in"][1][2].tolist(), [0, 0, 0]) - self.assertGreater(abs(np.sum(batch1["state_in"][0][0])), 0) - self.assertGreater(abs(np.sum(batch1["state_in"][1][0])), 0) - self.assertGreater(abs(np.sum(batch1["state_in"][0][1])), 0) - self.assertGreater(abs(np.sum(batch1["state_in"][1][1])), 0) - self.assertGreater(abs(np.sum(batch1["state_in"][0][3])), 0) - self.assertGreater(abs(np.sum(batch1["state_in"][1][3])), 0) - - def testMinibatchSequencing(self): - ModelCatalog.register_custom_model("rnn", RNNSpyModel) - register_env("counter", lambda _: DebugCounterEnv()) - ppo = PPOAgent( - env="counter", - config={ - "num_workers": 0, - "sample_batch_size": 20, - "train_batch_size": 20, - "sgd_minibatch_size": 10, - "vf_share_layers": True, - "simple_optimizer": False, - "num_sgd_iter": 1, - "model": { - "custom_model": "rnn", - "max_seq_len": 4, - }, - }) - ppo.train() - ppo.train() - - # first epoch: 20 observations get split into 2 minibatches of 8 - # four observations are discarded - batch0 = pickle.loads( - ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_0")) - batch1 = pickle.loads( - ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_1")) - if batch0["sequences"][0][0][0] > batch1["sequences"][0][0][0]: - batch0, batch1 = batch1, batch0 # sort minibatches - self.assertEqual(batch0["seq_lens"].tolist(), [4, 4]) - self.assertEqual(batch1["seq_lens"].tolist(), [4, 3]) - self.assertEqual(batch0["sequences"].tolist(), [ - [[0], [1], [2], [3]], - [[4], [5], [6], [7]], - ]) - self.assertEqual(batch1["sequences"].tolist(), [ - [[8], [9], [10], [11]], - [[12], [13], [14], [0]], - ]) - - # second epoch: 20 observations get split into 2 minibatches of 8 - # four observations are discarded - batch2 = pickle.loads( - ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_2")) - batch3 = pickle.loads( - ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_3")) - if batch2["sequences"][0][0][0] > batch3["sequences"][0][0][0]: - batch2, batch3 = batch3, batch2 - self.assertEqual(batch2["seq_lens"].tolist(), [4, 4]) - self.assertEqual(batch3["seq_lens"].tolist(), [2, 4]) - self.assertEqual(batch2["sequences"].tolist(), [ - [[5], [6], [7], [8]], - [[9], [10], [11], [12]], - ]) - self.assertEqual(batch3["sequences"].tolist(), [ - [[13], [14], [0], [0]], - [[0], [1], [2], [3]], - ]) - - if __name__ == "__main__": - ray.init(num_cpus=4) unittest.main(verbosity=2) From 29c174aa3e19ebc45fcadd6746f12120e7922a05 Mon Sep 17 00:00:00 2001 From: mark Date: Wed, 13 Mar 2019 16:19:42 +0000 Subject: [PATCH 06/11] Change input validation to asserts, remove unused imports and modify the skopt example to run both with and without known_rewards --- python/ray/tune/examples/skopt_example.py | 10 ++++++ python/ray/tune/suggest/skopt.py | 40 ++++++----------------- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/python/ray/tune/examples/skopt_example.py b/python/ray/tune/examples/skopt_example.py index 5112ebc7ba02..a120a329d474 100644 --- a/python/ray/tune/examples/skopt_example.py +++ b/python/ray/tune/examples/skopt_example.py @@ -58,3 +58,13 @@ def easy_objective(config, reporter): evaluated_rewards=known_rewards) scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss") run_experiments(config, search_alg=algo, scheduler=scheduler) + + # Now run the experiment without known rewards + + algo = SkOptSearch( + optimizer, ["width", "height"], + max_concurrent=4, + reward_attr="neg_mean_loss", + points_to_evaluate=previously_run_params) + scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss") + run_experiments(config, search_alg=algo, scheduler=scheduler) diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index 2d3fdbcb1261..f54b5131603d 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -1,8 +1,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numbers -from collections import Iterable try: import skopt @@ -67,21 +65,19 @@ def __init__(self, **kwargs): assert skopt is not None, """skopt must be installed! You can install Skopt with the command: - `pip install scikit-optimize`.""" - assert type(max_concurrent) is int and max_concurrent > 0 + `pip install scikit-optimize`.""" + assert type(max_concurrent) is int and max_concurrent > 0 if points_to_evaluate: - self._validate_points_to_evaluate(points_to_evaluate, len(parameter_names)) + self._validate_points_to_evaluate(points_to_evaluate, + len(parameter_names)) if evaluated_rewards: - self._validate_evaluated_rewards(evaluated_rewards) + assert isinstance(evaluated_rewards, list) self._initial_points = [] if points_to_evaluate and evaluated_rewards: - if len(points_to_evaluate) != len(evaluated_rewards): - raise ValueError( - "`points_to_evaluate` and `evaluated_rewards` should have the same length" - ) + assert len(points_to_evaluate) == len(evaluated_rewards) optimizer.tell(points_to_evaluate, evaluated_rewards) elif points_to_evaluate: - self._initial_points = points_to_evaluate + self._initial_points = points_to_evaluate self._max_concurrent = max_concurrent self._parameters = parameter_names self._reward_attr = reward_attr @@ -90,26 +86,10 @@ def __init__(self, super(SkOptSearch, self).__init__(**kwargs) def _validate_points_to_evaluate(self, points, dimension): - if not isinstance(points, list): - raise TypeError( - "`points_to_evaluate` should be a list, but got %s" % - type(points)) + assert isinstance(points, list) for point in points: - if not isinstance(point, list): - raise TypeError( - "`points_to_evaluate` should be a list, but got %s" % - type(point)) - if len(point) != dimension: - raise TypeError( - """each point in `points_to_evaluate` should - have the same dimensions as `parameter_names`""" - ) - - def _validate_evaluated_rewards(self, rewards): - if not isinstance(rewards, list): - raise TypeError( - "`evaluated_rewards` should be a list, but got %s" % - type(points_to_evaluate)) + assert isinstance(point, list) + assert len(point) == dimension def _suggest(self, trial_id): if self._num_live_trials() >= self._max_concurrent: From dd86c0f98623e916720c02e93ee35c7a28d0656e Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Wed, 13 Mar 2019 12:39:38 -0700 Subject: [PATCH 07/11] revert rllib changes --- python/ray/rllib/tests/run_silent.sh | 4 +- python/ray/rllib/tests/test_lstm.py | 217 ++++++++++++++++++++++++++- 2 files changed, 218 insertions(+), 3 deletions(-) diff --git a/python/ray/rllib/tests/run_silent.sh b/python/ray/rllib/tests/run_silent.sh index e0a69dd7b50f..0ddca2b9c7be 100755 --- a/python/ray/rllib/tests/run_silent.sh +++ b/python/ray/rllib/tests/run_silent.sh @@ -6,9 +6,9 @@ SCRIPT=$1 shift if [ -x $DIRECTORY/../$SCRIPT ]; then - $DIRECTORY/../$SCRIPT "$@" >$TMPFILE 2>&1 + time $DIRECTORY/../$SCRIPT "$@" >$TMPFILE 2>&1 else - python $DIRECTORY/../$SCRIPT "$@" >$TMPFILE 2>&1 + time python $DIRECTORY/../$SCRIPT "$@" >$TMPFILE 2>&1 fi CODE=$? diff --git a/python/ray/rllib/tests/test_lstm.py b/python/ray/rllib/tests/test_lstm.py index abb9ad0ccb4a..304478eb85cb 100644 --- a/python/ray/rllib/tests/test_lstm.py +++ b/python/ray/rllib/tests/test_lstm.py @@ -2,9 +2,20 @@ from __future__ import division from __future__ import print_function +import gym +import numpy as np +import pickle import unittest +import tensorflow as tf +import tensorflow.contrib.rnn as rnn -from ray.rllib.models.lstm import chop_into_sequences +import ray +from ray.rllib.agents.ppo import PPOAgent +from ray.rllib.models import ModelCatalog +from ray.rllib.models.lstm import add_time_dimension, chop_into_sequences +from ray.rllib.models.misc import linear, normc_initializer +from ray.rllib.models.model import Model +from ray.tune.registry import register_env class LSTMUtilsTest(unittest.TestCase): @@ -48,5 +59,209 @@ def testDynamicMaxLen(self): self.assertEqual(seq_lens.tolist(), [1, 2]) +class RNNSpyModel(Model): + capture_index = 0 + + def _build_layers_v2(self, input_dict, num_outputs, options): + def spy(sequences, state_in, state_out, seq_lens): + if len(sequences) == 1: + return 0 # don't capture inference inputs + # TF runs this function in an isolated context, so we have to use + # redis to communicate back to our suite + ray.experimental.internal_kv._internal_kv_put( + "rnn_spy_in_{}".format(RNNSpyModel.capture_index), + pickle.dumps({ + "sequences": sequences, + "state_in": state_in, + "state_out": state_out, + "seq_lens": seq_lens + }), + overwrite=True) + RNNSpyModel.capture_index += 1 + return 0 + + features = input_dict["obs"] + cell_size = 3 + last_layer = add_time_dimension(features, self.seq_lens) + + # Setup the LSTM cell + lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True) + self.state_init = [ + np.zeros(lstm.state_size.c, np.float32), + np.zeros(lstm.state_size.h, np.float32) + ] + + # Setup LSTM inputs + if self.state_in: + c_in, h_in = self.state_in + else: + c_in = tf.placeholder( + tf.float32, [None, lstm.state_size.c], name="c") + h_in = tf.placeholder( + tf.float32, [None, lstm.state_size.h], name="h") + self.state_in = [c_in, h_in] + + # Setup LSTM outputs + state_in = rnn.LSTMStateTuple(c_in, h_in) + lstm_out, lstm_state = tf.nn.dynamic_rnn( + lstm, + last_layer, + initial_state=state_in, + sequence_length=self.seq_lens, + time_major=False, + dtype=tf.float32) + + self.state_out = list(lstm_state) + spy_fn = tf.py_func( + spy, [ + last_layer, + self.state_in, + self.state_out, + self.seq_lens, + ], + tf.int64, + stateful=True) + + # Compute outputs + with tf.control_dependencies([spy_fn]): + last_layer = tf.reshape(lstm_out, [-1, cell_size]) + logits = linear(last_layer, num_outputs, "action", + normc_initializer(0.01)) + return logits, last_layer + + +class DebugCounterEnv(gym.Env): + def __init__(self): + self.action_space = gym.spaces.Discrete(2) + self.observation_space = gym.spaces.Box(0, 100, (1, )) + self.i = 0 + + def reset(self): + self.i = 0 + return [self.i] + + def step(self, action): + self.i += 1 + return [self.i], self.i % 3, self.i >= 15, {} + + +class RNNSequencing(unittest.TestCase): + def testSimpleOptimizerSequencing(self): + ModelCatalog.register_custom_model("rnn", RNNSpyModel) + register_env("counter", lambda _: DebugCounterEnv()) + ppo = PPOAgent( + env="counter", + config={ + "num_workers": 0, + "sample_batch_size": 10, + "train_batch_size": 10, + "sgd_minibatch_size": 10, + "vf_share_layers": True, + "simple_optimizer": True, + "num_sgd_iter": 1, + "model": { + "custom_model": "rnn", + "max_seq_len": 4, + }, + }) + ppo.train() + ppo.train() + + batch0 = pickle.loads( + ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_0")) + self.assertEqual( + batch0["sequences"].tolist(), + [[[0], [1], [2], [3]], [[4], [5], [6], [7]], [[8], [9], [0], [0]]]) + self.assertEqual(batch0["seq_lens"].tolist(), [4, 4, 2]) + self.assertEqual(batch0["state_in"][0][0].tolist(), [0, 0, 0]) + self.assertEqual(batch0["state_in"][1][0].tolist(), [0, 0, 0]) + self.assertGreater(abs(np.sum(batch0["state_in"][0][1])), 0) + self.assertGreater(abs(np.sum(batch0["state_in"][1][1])), 0) + self.assertTrue( + np.allclose(batch0["state_in"][0].tolist()[1:], + batch0["state_out"][0].tolist()[:-1])) + self.assertTrue( + np.allclose(batch0["state_in"][1].tolist()[1:], + batch0["state_out"][1].tolist()[:-1])) + + batch1 = pickle.loads( + ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_1")) + self.assertEqual(batch1["sequences"].tolist(), [ + [[10], [11], [12], [13]], + [[14], [0], [0], [0]], + [[0], [1], [2], [3]], + [[4], [0], [0], [0]], + ]) + self.assertEqual(batch1["seq_lens"].tolist(), [4, 1, 4, 1]) + self.assertEqual(batch1["state_in"][0][2].tolist(), [0, 0, 0]) + self.assertEqual(batch1["state_in"][1][2].tolist(), [0, 0, 0]) + self.assertGreater(abs(np.sum(batch1["state_in"][0][0])), 0) + self.assertGreater(abs(np.sum(batch1["state_in"][1][0])), 0) + self.assertGreater(abs(np.sum(batch1["state_in"][0][1])), 0) + self.assertGreater(abs(np.sum(batch1["state_in"][1][1])), 0) + self.assertGreater(abs(np.sum(batch1["state_in"][0][3])), 0) + self.assertGreater(abs(np.sum(batch1["state_in"][1][3])), 0) + + def testMinibatchSequencing(self): + ModelCatalog.register_custom_model("rnn", RNNSpyModel) + register_env("counter", lambda _: DebugCounterEnv()) + ppo = PPOAgent( + env="counter", + config={ + "num_workers": 0, + "sample_batch_size": 20, + "train_batch_size": 20, + "sgd_minibatch_size": 10, + "vf_share_layers": True, + "simple_optimizer": False, + "num_sgd_iter": 1, + "model": { + "custom_model": "rnn", + "max_seq_len": 4, + }, + }) + ppo.train() + ppo.train() + + # first epoch: 20 observations get split into 2 minibatches of 8 + # four observations are discarded + batch0 = pickle.loads( + ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_0")) + batch1 = pickle.loads( + ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_1")) + if batch0["sequences"][0][0][0] > batch1["sequences"][0][0][0]: + batch0, batch1 = batch1, batch0 # sort minibatches + self.assertEqual(batch0["seq_lens"].tolist(), [4, 4]) + self.assertEqual(batch1["seq_lens"].tolist(), [4, 3]) + self.assertEqual(batch0["sequences"].tolist(), [ + [[0], [1], [2], [3]], + [[4], [5], [6], [7]], + ]) + self.assertEqual(batch1["sequences"].tolist(), [ + [[8], [9], [10], [11]], + [[12], [13], [14], [0]], + ]) + + # second epoch: 20 observations get split into 2 minibatches of 8 + # four observations are discarded + batch2 = pickle.loads( + ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_2")) + batch3 = pickle.loads( + ray.experimental.internal_kv._internal_kv_get("rnn_spy_in_3")) + if batch2["sequences"][0][0][0] > batch3["sequences"][0][0][0]: + batch2, batch3 = batch3, batch2 + self.assertEqual(batch2["seq_lens"].tolist(), [4, 4]) + self.assertEqual(batch3["seq_lens"].tolist(), [2, 4]) + self.assertEqual(batch2["sequences"].tolist(), [ + [[5], [6], [7], [8]], + [[9], [10], [11], [12]], + ]) + self.assertEqual(batch3["sequences"].tolist(), [ + [[13], [14], [0], [0]], + [[0], [1], [2], [3]], + ]) + + if __name__ == "__main__": + ray.init(num_cpus=4) unittest.main(verbosity=2) From 44774d4a965d4a21d635b4e53cfe3ed849ddcf47 Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Wed, 13 Mar 2019 13:02:52 -0700 Subject: [PATCH 08/11] better error --- python/ray/tune/suggest/skopt.py | 58 +++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index f54b5131603d..2a906cfa113d 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -10,6 +10,34 @@ from ray.tune.suggest.suggestion import SuggestionAlgorithm +def _validate_warmstart(names, points, rewards): + dimension = len(names) + if points: + if not isinstance(points, list): + raise TypeError( + "points_to_evaluate expected to be a list, got {}.".format( + type(points))) + for point in points: + if not isinstance(point, list): + raise TypeError( + "points_to_evaluate expected to include list, got {}.". + format(type(point))) + + if not len(points) == dimension: + raise ValueError( + "points_to_evaluate expected to be len {}, got {}.".format( + dimension, len(points))) + if points and rewards: + if not isinstance(rewards, list): + raise TypeError( + "evaluated_rewards expected to be a list, got {}.".format( + type(rewards))) + if not len(rewards) == dimension: + raise ValueError( + "evaluated_rewards expected to be len {}, got {}.".format( + dimension, len(rewards))) + + class SkOptSearch(SuggestionAlgorithm): """A wrapper around skopt to provide trial suggestions. @@ -24,18 +52,17 @@ class SkOptSearch(SuggestionAlgorithm): to 10. reward_attr (str): The training result objective value attribute. This refers to an increasing value. - points_to_evaluate (list of lists): A list of trials you'd like to run + points_to_evaluate (list of lists): A list of points you'd like to run first before sampling from the optimiser, e.g. these could be parameter configurations you already know work well to help - the optimiser select good values. Each trial is a list of the - parameters of that trial using the order definition given - to the optimiser (see example below) + the optimiser select good values. Each point is a list of the + parameters using the order definition given by parameter_names. evaluated_rewards (list): If you have previously evaluated the parameters passed in as points_to_evaluate you can avoid re-running those trials by passing in the reward attributes as a list so the optimiser can be told the results without needing to re-compute the trial. Must be the same length as - points_to_evaluate. (See skopt_example.py) + points_to_evaluate. (See tune/examples/skopt_example.py) Example: >>> from skopt import Optimizer @@ -51,8 +78,10 @@ class SkOptSearch(SuggestionAlgorithm): >>> } >>> } >>> algo = SkOptSearch(optimizer, - >>> ["width", "height"], max_concurrent=4, - >>> reward_attr="neg_mean_loss", points_to_evaluate=current_best_params) + >>> ["width", "height"], + >>> max_concurrent=4, + >>> reward_attr="neg_mean_loss", + >>> points_to_evaluate=current_best_params) """ def __init__(self, @@ -67,14 +96,11 @@ def __init__(self, You can install Skopt with the command: `pip install scikit-optimize`.""" assert type(max_concurrent) is int and max_concurrent > 0 - if points_to_evaluate: - self._validate_points_to_evaluate(points_to_evaluate, - len(parameter_names)) - if evaluated_rewards: - assert isinstance(evaluated_rewards, list) + _validate_warmstart(parameter_names, points_to_evaluate, + evaluated_rewards) + self._initial_points = [] if points_to_evaluate and evaluated_rewards: - assert len(points_to_evaluate) == len(evaluated_rewards) optimizer.tell(points_to_evaluate, evaluated_rewards) elif points_to_evaluate: self._initial_points = points_to_evaluate @@ -85,12 +111,6 @@ def __init__(self, self._live_trial_mapping = {} super(SkOptSearch, self).__init__(**kwargs) - def _validate_points_to_evaluate(self, points, dimension): - assert isinstance(points, list) - for point in points: - assert isinstance(point, list) - assert len(point) == dimension - def _suggest(self, trial_id): if self._num_live_trials() >= self._max_concurrent: return None From e8bcfcd6126628a41a286c1c8859aa004052f7a1 Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Wed, 13 Mar 2019 13:22:47 -0700 Subject: [PATCH 09/11] better errors --- python/ray/tune/suggest/skopt.py | 36 ++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index 2a906cfa113d..aa4a9dd85592 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -10,32 +10,36 @@ from ray.tune.suggest.suggestion import SuggestionAlgorithm -def _validate_warmstart(names, points, rewards): - dimension = len(names) - if points: - if not isinstance(points, list): +def _validate_warmstart(parameter_names, points_to_evaluate, + evaluated_rewards): + dimension = len(parameter_names) + if points_to_evaluate: + if not isinstance(points_to_evaluate, list): raise TypeError( "points_to_evaluate expected to be a list, got {}.".format( - type(points))) - for point in points: + type(points_to_evaluate))) + for point in points_to_evaluate: if not isinstance(point, list): raise TypeError( "points_to_evaluate expected to include list, got {}.". - format(type(point))) + format(point)) - if not len(points) == dimension: + if not len(points_to_evaluate) == dimension: raise ValueError( - "points_to_evaluate expected to be len {}, got {}.".format( - dimension, len(points))) - if points and rewards: - if not isinstance(rewards, list): + "Dim of points_to_evaluate {}".format(points_to_evaluate) + + " and parameter_names {}".format(parameter_names) + + " do not match.") + + if points_to_evaluate and evaluated_rewards: + if not isinstance(evaluated_rewards, list): raise TypeError( "evaluated_rewards expected to be a list, got {}.".format( - type(rewards))) - if not len(rewards) == dimension: + type(evaluated_rewards))) + if not len(evaluated_rewards) == dimension: raise ValueError( - "evaluated_rewards expected to be len {}, got {}.".format( - dimension, len(rewards))) + "Dim of evaluated_rewards {}".format(evaluated_rewards) + + " and parameter_names {}".format(parameter_names) + + " do not match.") class SkOptSearch(SuggestionAlgorithm): From faca2c124929de74bdf09fcb323bead0dccf4cdc Mon Sep 17 00:00:00 2001 From: mark Date: Fri, 15 Mar 2019 14:47:50 +0000 Subject: [PATCH 10/11] Length of points_to_evaluate (number of experiments) was being compared to length of parameter_names (number of parameter dimensions). Fixed this comparion and also fixed the points_to_evaluate and evaluated_rewards comparison --- python/ray/tune/suggest/skopt.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index aa4a9dd85592..06aa753033d1 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -12,7 +12,6 @@ def _validate_warmstart(parameter_names, points_to_evaluate, evaluated_rewards): - dimension = len(parameter_names) if points_to_evaluate: if not isinstance(points_to_evaluate, list): raise TypeError( @@ -24,7 +23,7 @@ def _validate_warmstart(parameter_names, points_to_evaluate, "points_to_evaluate expected to include list, got {}.". format(point)) - if not len(points_to_evaluate) == dimension: + if not len(points_to_evaluate[0]) == len(parameter_names): raise ValueError( "Dim of points_to_evaluate {}".format(points_to_evaluate) + " and parameter_names {}".format(parameter_names) + @@ -35,10 +34,10 @@ def _validate_warmstart(parameter_names, points_to_evaluate, raise TypeError( "evaluated_rewards expected to be a list, got {}.".format( type(evaluated_rewards))) - if not len(evaluated_rewards) == dimension: + if not len(evaluated_rewards) == len(points_to_evaluate): raise ValueError( "Dim of evaluated_rewards {}".format(evaluated_rewards) + - " and parameter_names {}".format(parameter_names) + + " and points_to_evaluate {}".format(points_to_evaluate) + " do not match.") From 76b1598325976c9523204583eeee7c6d96a0d7ce Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Fri, 15 Mar 2019 22:59:15 -0700 Subject: [PATCH 11/11] more comprehensive check --- python/ray/tune/suggest/skopt.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index 06aa753033d1..cff340f418e4 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -23,11 +23,10 @@ def _validate_warmstart(parameter_names, points_to_evaluate, "points_to_evaluate expected to include list, got {}.". format(point)) - if not len(points_to_evaluate[0]) == len(parameter_names): - raise ValueError( - "Dim of points_to_evaluate {}".format(points_to_evaluate) + - " and parameter_names {}".format(parameter_names) + - " do not match.") + if not len(point) == len(parameter_names): + raise ValueError("Dim of point {}".format(point) + + " and parameter_names {}".format( + parameter_names) + " do not match.") if points_to_evaluate and evaluated_rewards: if not isinstance(evaluated_rewards, list):