ray-project · sven1977 · Apr 28, 2022 · Apr 27, 2022 · Apr 28, 2022 · Apr 28, 2022
@@ -250,6 +250,11 @@ def compute_gradients(self, loss, var_list):
         return list(zip(self.tape.gradient(loss, var_list), var_list))
 
 
+class EagerTFPolicy(Policy):
+    """Dummy class to recognize any eagerized TFPolicy by its inheritance."""
+    pass
+
+
 def build_eager_tf_policy(
     name,
     loss_fn,
@@ -309,7 +314,7 @@ def __init__(self, observation_space, action_space, config):
             if not tf1.executing_eagerly():
                 tf1.enable_eager_execution()
             self.framework = config.get("framework", "tfe")
-            Policy.__init__(self, observation_space, action_space, config)
+            EagerTFPolicy.__init__(self, observation_space, action_space, config)
 
             # Global timestep should be a tensor.
             self.global_timestep = tf.Variable(0, trainable=False, dtype=tf.int64)
@@ -594,7 +599,7 @@ def postprocess_trajectory(
         ):
             assert tf.executing_eagerly()
             # Call super's postprocess_trajectory first.
-            sample_batch = Policy.postprocess_trajectory(self, sample_batch)
+            sample_batch = EagerTFPolicy.postprocess_trajectory(self, sample_batch)
             if postprocess_fn:
                 return postprocess_fn(self, sample_batch, other_agent_batches, episode)
             return sample_batch

@@ -240,9 +240,11 @@ class for.
             assert tf1.executing_eagerly()
 
             from ray.rllib.policy.tf_policy import TFPolicy
+            from ray.rllib.policy.eager_tf_policy import EagerTFPolicy
 
-            # Create eager-class.
-            if hasattr(orig_cls, "as_eager"):
+            # Create eager-class (if not already one).
+            if hasattr(orig_cls, "as_eager") and \
+                    not issubclass(orig_cls, EagerTFPolicy):
                 cls = orig_cls.as_eager()
                 if config.get("eager_tracing"):
                     cls = cls.with_tracing()