[RLlib] Fixed a bug with kl divergence calculation of torch.Dirichlet…

… distribution within RLlib (#34209) Signed-off-by: Kourosh Hakhamaneshi <[email protected]>
ray-project · Apr 11, 2023 · 4168b9b · 4168b9b
1 parent f3bd6c0
commit 4168b9b
Showing 1 changed file with 1 addition and 5 deletions.
diff --git a/rllib/models/torch/torch_action_dist.py b/rllib/models/torch/torch_action_dist.py
@@ -622,7 +622,7 @@ def __init__(self, inputs, model):
 
     @override(ActionDistribution)
     def deterministic_sample(self) -> TensorType:
-        self.last_sample = nn.functional.softmax(self.dist.concentration)
+        self.last_sample = nn.functional.softmax(self.dist.concentration, dim=-1)
         return self.last_sample
 
     @override(ActionDistribution)
@@ -638,10 +638,6 @@ def logp(self, x):
     def entropy(self):
         return self.dist.entropy()
 
-    @override(ActionDistribution)
-    def kl(self, other):
-        return self.dist.kl_divergence(other.dist)
-
     @staticmethod
     @override(ActionDistribution)
     def required_model_output_shape(action_space, model_config):