You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import tensorflow as tf
import ray
from ray import tune
from ray.rllib.agents.trainer_template import build_trainer
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.policy.tf_policy_template import build_tf_policy
def policy_gradient_loss(policy, batch_tensors):
actions = batch_tensors[SampleBatch.ACTIONS]
rewards = batch_tensors[SampleBatch.REWARDS]
return -tf.reduce_mean(policy.action_dist.logp(actions) * rewards)
# <class 'ray.rllib.policy.tf_policy_template.MyTFPolicy'>
MyTFPolicy = build_tf_policy(
name="MyTFPolicy",
loss_fn=policy_gradient_loss,
)
# <class 'ray.rllib.agents.trainer_template.MyCustomTrainer'>
MyTrainer = build_trainer(
name="MyCustomTrainer",
default_policy=MyTFPolicy,
)
ray.init()
tune.run(
MyTrainer,
config={
"env" : "CartPole-v0",
"num_workers": 2,
}
)
Full traceback:
Traceback (most recent call last):
File "/home/ubuntu/ray/python/ray/tune/trial_runner.py", line 446, in _process_trial
result = self.trial_executor.fetch_result(trial)
File "/home/ubuntu/ray/python/ray/tune/ray_trial_executor.py", line 316, in fetch_result
result = ray.get(trial_future[0])
File "/home/ubuntu/ray/python/ray/worker.py", line 2189, in get
raise value
ray.exceptions.RayTaskError: ray_MyCustomTrainer:train() (pid=8300, host=...)
File "/home/ubuntu/ray/python/ray/rllib/agents/trainer.py", line 311, in __init__
Trainable.__init__(self, config, logger_creator)
File "/home/ubuntu/ray/python/ray/tune/trainable.py", line 88, in __init__
self._setup(copy.deepcopy(self.config))
File "/home/ubuntu/ray/python/ray/rllib/agents/trainer.py", line 424, in _setup
self._init(self.config, self.env_creator)
File "/home/ubuntu/ray/python/ray/rllib/agents/trainer_template.py", line 63, in _init
env_creator, policy)
File "/home/ubuntu/ray/python/ray/rllib/agents/trainer.py", line 622, in make_local_evaluator
extra_config or {}))
File "/home/ubuntu/ray/python/ray/rllib/agents/trainer.py", line 847, in _make_evaluator
_fake_sampler=config.get("_fake_sampler", False))
File "/home/ubuntu/ray/python/ray/rllib/evaluation/policy_evaluator.py", line 321, in __init__
self._build_policy_map(policy_dict, policy_config)
File "/home/ubuntu/ray/python/ray/rllib/evaluation/policy_evaluator.py", line 727, in _build_policy_map
policy_map[name] = cls(obs_space, act_space, merged_conf)
File "/home/ubuntu/ray/python/ray/rllib/policy/tf_policy_template.py", line 109, in __init__
existing_inputs=existing_inputs)
File "/home/ubuntu/ray/python/ray/rllib/policy/dynamic_tf_policy.py", line 159, in __init__
self._initialize_loss()
File "/home/ubuntu/ray/python/ray/rllib/policy/dynamic_tf_policy.py", line 272, in _initialize_loss
TFPolicy._initialize_loss(self, loss, loss_inputs)
File "/home/ubuntu/ray/python/ray/rllib/policy/tf_policy.py", line 154, in _initialize_loss
self._optimizer = self.optimizer()
File "/home/ubuntu/ray/python/ray/rllib/policy/tf_policy_template.py", line 129, in optimizer
return TFPolicy.optimizer(self)
File "/home/ubuntu/ray/python/ray/rllib/policy/tf_policy.py", line 287, in optimizer
return tf.train.AdamOptimizer(self.config["lr"])
KeyError: 'lr'
System information
Problem Description:
In the doc section of concepts https://ray.readthedocs.io/en/latest/rllib-concepts.html#building-policies-in-tensorflow a sample example throws an error due to the absence of a learning rate for an optimizer
Source code:
Full traceback:
Possible solution:
Edit the example in the docs:
Another fix would be to edit
COMMON_CONFIG
inray.rllib.agents.trainer
to include the learning rate key.The text was updated successfully, but these errors were encountered: