ray-project · sven1977 · Apr 29, 2022 · Apr 26, 2022 · gjoliver · Apr 26, 2022
@@ -1,7 +1,7 @@
 import numpy as np
 import os
 import tracemalloc
-from typing import Dict, Optional, TYPE_CHECKING
+from typing import Dict, Optional, Tuple, TYPE_CHECKING
 
 from ray.rllib.env.base_env import BaseEnv
 from ray.rllib.env.env_context import EnvContext
@@ -196,7 +196,7 @@ def on_postprocess_trajectory(
         policy_id: PolicyID,
         policies: Dict[PolicyID, Policy],
         postprocessed_batch: SampleBatch,
-        original_batches: Dict[AgentID, SampleBatch],
+        original_batches: Dict[AgentID, Tuple[Policy, SampleBatch]],
 pre_batches = {} 
 for (eps_id, agent_id), collector in self.agent_collectors.items(): 
     # Build only if there is data and agent is part of given episode. 
     if collector.agent_steps == 0 or eps_id != episode_id: 
         continue 
     pid = self.agent_key_to_policy_id[(eps_id, agent_id)] 
     policy = self.policy_map[pid] 
     pre_batch = collector.build(policy.view_requirements) 
     pre_batches[agent_id] = (policy, pre_batch) 
 for agent_id, post_batch in sorted(post_batches.items()): 
     agent_key = (episode_id, agent_id) 
     pid = self.agent_key_to_policy_id[agent_key] 
     policy = self.policy_map[pid] 
     self.callbacks.on_postprocess_trajectory( 
         worker=get_global_worker(), 
         episode=episode, 
         agent_id=agent_id, 
         policy_id=pid, 
         policies=self.policy_map, 
         postprocessed_batch=post_batch, 
         original_batches=pre_batches, 
     ) 
 pre_batches = {} 
 for (eps_id, agent_id), collector in self.agent_collectors.items(): 
     # Build only if there is data and agent is part of given episode. 
     if collector.agent_steps == 0 or eps_id != episode_id: 
         continue 
     pid = self.agent_key_to_policy_id[(eps_id, agent_id)] 
     policy = self.policy_map[pid] 
     pre_batch = collector.build(policy.view_requirements) 
     pre_batches[agent_id] = (policy, pre_batch) 
 for agent_id, post_batch in sorted(post_batches.items()): 
     agent_key = (episode_id, agent_id) 
     pid = self.agent_key_to_policy_id[agent_key] 
     policy = self.policy_map[pid] 
     self.callbacks.on_postprocess_trajectory( 
         worker=get_global_worker(), 
         episode=episode, 
         agent_id=agent_id, 
         policy_id=pid, 
         policies=self.policy_map, 
         postprocessed_batch=post_batch, 
         original_batches=pre_batches, 
     ) 
         **kwargs,
     ) -> None:
         """Called immediately after a policy's postprocess_fn is called.
@@ -470,7 +470,7 @@ def on_postprocess_trajectory(
         policy_id: PolicyID,
         policies: Dict[PolicyID, Policy],
         postprocessed_batch: SampleBatch,
-        original_batches: Dict[AgentID, SampleBatch],
+        original_batches: Dict[AgentID, Tuple[Policy, SampleBatch]],
         **kwargs,
     ) -> None:
         for callback in self._callback_list:

@@ -4,7 +4,7 @@
 custom metric.
 """
 
-from typing import Dict
+from typing import Dict, Tuple
 import argparse
 import numpy as np
 import os
@@ -129,7 +129,7 @@ def on_postprocess_trajectory(
         policy_id: str,
         policies: Dict[str, Policy],
         postprocessed_batch: SampleBatch,
-        original_batches: Dict[str, SampleBatch],
+        original_batches: Dict[str, Tuple[Policy, SampleBatch]],
         **kwargs
     ):
         print("postprocessed {} steps".format(postprocessed_batch.count))