ray-project · ericl · May 27, 2019 · May 19, 2019 · May 19, 2019 · May 19, 2019
@@ -98,10 +98,10 @@ Ray comes with libraries that accelerate deep learning and reinforcement learnin
    rllib-models.rst
    rllib-algorithms.rst
    rllib-offline.rst
-   rllib-dev.rst
    rllib-concepts.rst
-   rllib-package-ref.rst
    rllib-examples.rst
+   rllib-dev.rst
+   rllib-package-ref.rst
 
 .. toctree::
    :maxdepth: 1

@@ -275,7 +275,7 @@ Implementing a centralized critic that takes as input the observations and actio
 
 .. code-block:: python
 
-    def postprocess_trajectory(self, sample_batch, other_agent_batches, episode):
+    def postprocess_trajectory(policy, sample_batch, other_agent_batches, episode):
         agents = ["agent_1", "agent_2", "agent_3"]  # simple example of 3 agents
         global_obs_batch = np.stack(
             [other_agent_batches[agent_id][1]["obs"] for agent_id in agents],

@@ -5,8 +5,7 @@ RLlib is an open-source library for reinforcement learning that offers both high
 
 .. image:: rllib-stack.svg
 
-Learn more about RLlib's design by reading the `ICML paper <https://arxiv.org/abs/1712.09381>`__.
-To get started, take a look over the `custom env example <https://github.com/ray-project/ray/blob/master/python/ray/rllib/examples/custom_env.py>`__ and the `API documentation <rllib-training.html>`__.
+To get started, take a look over the `custom env example <https://github.com/ray-project/ray/blob/master/python/ray/rllib/examples/custom_env.py>`__ and the `API documentation <rllib-training.html>`__. If you're looking to develop custom algorithms with RLlib, also check out `concepts and custom algorithms <rllib-concepts.html>`__.
 
 Installation
 ------------
@@ -96,12 +95,17 @@ Offline Datasets
 * `Input API <rllib-offline.html#input-api>`__
 * `Output API <rllib-offline.html#output-api>`__
 
-Concepts
---------
-* `Policies <rllib-concepts.html>`__
-* `Policy Evaluation <rllib-concepts.html#policy-evaluation>`__
-* `Policy Optimization <rllib-concepts.html#policy-optimization>`__
-* `Trainers <rllib-concepts.html#trainers>`__
+Concepts and Custom Algorithms
+------------------------------
+*  `Policies <rllib-concepts.html>`__
+
+   -  `Building Policies in TensorFlow <rllib-concepts.html#building-policies-in-tensorflow>`__
+
+   -  `Building Policies in PyTorch <rllib-concepts.html#building-policies-in-pytorch>`__
+
+*  `Policy Evaluation <rllib-concepts.html#policy-evaluation>`__
+*  `Policy Optimization <rllib-concepts.html#policy-optimization>`__
+*  `Trainers <rllib-concepts.html#trainers>`__
 
 Examples
 --------

diff --git a/python/ray/rllib/agents/pg/pg.py b/python/ray/rllib/agents/pg/pg.py
@@ -29,7 +29,7 @@ def get_policy_class(config):
 
 
 PGTrainer = build_trainer(
-    name="PG",
+    name="PGTrainer",
     default_config=DEFAULT_CONFIG,
     default_policy=PGTFPolicy,
     get_policy_class=get_policy_class)
diff --git a/python/ray/rllib/agents/ppo/ppo.py b/python/ray/rllib/agents/ppo/ppo.py
@@ -63,7 +63,7 @@
 # yapf: enable
 
 
-def make_optimizer(local_evaluator, remote_evaluators, config):
+def choose_policy_optimizer(local_evaluator, remote_evaluators, config):
     if config["simple_optimizer"]:
         return SyncSamplesOptimizer(
             local_evaluator,
@@ -155,10 +155,10 @@ def validate_config(config):
 
 
 PPOTrainer = build_trainer(
-    name="PPO",
+    name="PPOTrainer",
     default_config=DEFAULT_CONFIG,
     default_policy=PPOTFPolicy,
-    make_policy_optimizer=make_optimizer,
+    make_policy_optimizer=choose_policy_optimizer,
     validate_config=validate_config,
     after_optimizer_step=update_kl,
     before_train_step=warn_about_obs_filter,

diff --git a/python/ray/rllib/agents/trainer_template.py b/python/ray/rllib/agents/trainer_template.py
@@ -2,7 +2,7 @@
 from __future__ import division
 from __future__ import print_function
 
-from ray.rllib.agents.trainer import Trainer
+from ray.rllib.agents.trainer import Trainer, COMMON_CONFIG
 from ray.rllib.optimizers import SyncSamplesOptimizer
 from ray.rllib.utils.annotations import override, DeveloperAPI
 
@@ -44,13 +44,12 @@ def build_trainer(name,
         a Trainer instance that uses the specified args.
     """
 
-    if name.endswith("Trainer"):
-        raise ValueError("Algorithm name should not include *Trainer suffix",
-                         name)
+    if not name.endswith("Trainer"):
+        raise ValueError("Algorithm name should have *Trainer suffix", name)
 
     class trainer_cls(Trainer):
         _name = name
-        _default_config = default_config or Trainer.COMMON_CONFIG
+        _default_config = default_config or COMMON_CONFIG
         _policy = default_policy
 
         def _init(self, config, env_creator):
@@ -92,6 +91,6 @@ def _train(self):
                 after_train_result(self, res)
             return res
 
-    trainer_cls.__name__ = name + "Trainer"
-    trainer_cls.__qualname__ = name + "Trainer"
+    trainer_cls.__name__ = name
+    trainer_cls.__qualname__ = name
     return trainer_cls