ray-project · sven1977 · Oct 27, 2021 · Oct 26, 2021 · Oct 26, 2021 · Oct 27, 2021
@@ -14,6 +14,7 @@
 import psutil
 
 if TYPE_CHECKING:
+    from ray.rllib.agents.trainer import Trainer
     from ray.rllib.evaluation import RolloutWorker
 
 
@@ -35,28 +36,21 @@ def __init__(self, legacy_callbacks_dict: Dict[str, callable] = None):
                 "a class extending rllib.agents.callbacks.DefaultCallbacks")
         self.legacy_callbacks = legacy_callbacks_dict or {}
 
-    def on_episode_start(self,
-                         *,
-                         worker: "RolloutWorker",
-                         base_env: BaseEnv,
+    def on_episode_start(self, *, worker: "RolloutWorker", base_env: BaseEnv,
                          policies: Dict[PolicyID, Policy],
-                         episode: MultiAgentEpisode,
-                         env_index: Optional[int] = None,
-                         **kwargs) -> None:
+                         episode: MultiAgentEpisode, **kwargs) -> None:
         """Callback run on the rollout worker before each episode starts.
 
         Args:
-            worker (RolloutWorker): Reference to the current rollout worker.
-            base_env (BaseEnv): BaseEnv running the episode. The underlying
+            worker: Reference to the current rollout worker.
+            base_env: BaseEnv running the episode. The underlying
                 env object can be gotten by calling base_env.get_unwrapped().
-            policies (dict): Mapping of policy id to policy objects. In single
+            policies: Mapping of policy id to policy objects. In single
                 agent mode there will only be a single "default" policy.
-            episode (MultiAgentEpisode): Episode object which contains episode
+            episode: Episode object which contains episode
                 state. You can use the `episode.user_data` dict to store
                 temporary data, and `episode.custom_metrics` to store custom
                 metrics for the episode.
-            env_index (EnvID): Obsoleted: The ID of the environment, which the
-                episode belongs to.
             kwargs: Forward compatibility placeholder.
         """
 
@@ -73,7 +67,6 @@ def on_episode_step(self,
                         base_env: BaseEnv,
                         policies: Optional[Dict[PolicyID, Policy]] = None,
                         episode: MultiAgentEpisode,
-                        env_index: Optional[int] = None,
                         **kwargs) -> None:
         """Runs on each episode step.
 
@@ -88,8 +81,6 @@ def on_episode_step(self,
                 state. You can use the `episode.user_data` dict to store
                 temporary data, and `episode.custom_metrics` to store custom
                 metrics for the episode.
-            env_index (EnvID): Obsoleted: The ID of the environment, which the
-                episode belongs to.
             kwargs: Forward compatibility placeholder.
         """
 
@@ -99,14 +90,9 @@ def on_episode_step(self,
                 "episode": episode
             })
 
-    def on_episode_end(self,
-                       *,
-                       worker: "RolloutWorker",
-                       base_env: BaseEnv,
+    def on_episode_end(self, *, worker: "RolloutWorker", base_env: BaseEnv,
                        policies: Dict[PolicyID, Policy],
-                       episode: MultiAgentEpisode,
-                       env_index: Optional[int] = None,
-                       **kwargs) -> None:
+                       episode: MultiAgentEpisode, **kwargs) -> None:
         """Runs when an episode is done.
 
         Args:
@@ -120,8 +106,6 @@ def on_episode_end(self,
                 state. You can use the `episode.user_data` dict to store
                 temporary data, and `episode.custom_metrics` to store custom
                 metrics for the episode.
-            env_index (EnvID): Obsoleted: The ID of the environment, which the
-                episode belongs to.
             kwargs: Forward compatibility placeholder.
         """
 
@@ -201,12 +185,13 @@ def on_learn_on_batch(self, *, policy: Policy, train_batch: SampleBatch,
 
         pass
 
-    def on_train_result(self, *, trainer, result: dict, **kwargs) -> None:
+    def on_train_result(self, *, trainer: "Trainer", result: dict,
+                        **kwargs) -> None:
         """Called at the end of Trainable.train().
 
         Args:
-            trainer (Trainer): Current trainer instance.
-            result (dict): Dict of results returned from trainer.train() call.
+            trainer: Current trainer instance.
+            result: Dict of results returned from trainer.train() call.
                 You can mutate this object to add additional metrics.
             kwargs: Forward compatibility placeholder.
         """