diff --git a/omnisafe/algorithms/on_policy/base/natural_pg.py b/omnisafe/algorithms/on_policy/base/natural_pg.py index cf33dca82..a98b206ba 100644 --- a/omnisafe/algorithms/on_policy/base/natural_pg.py +++ b/omnisafe/algorithms/on_policy/base/natural_pg.py @@ -116,7 +116,7 @@ def _update_actor( # pylint: disable=too-many-arguments, too-many-locals self._logger.store( **{ - 'Train/Entropy': info['entrophy'], + 'Train/Entropy': info['entropy'], 'Train/PolicyRatio': info['ratio'], 'Train/PolicyStd': info['std'], 'Loss/Loss_pi': loss.mean().item(), diff --git a/omnisafe/algorithms/on_policy/base/policy_gradient.py b/omnisafe/algorithms/on_policy/base/policy_gradient.py index cd194e35a..acaa7c814 100644 --- a/omnisafe/algorithms/on_policy/base/policy_gradient.py +++ b/omnisafe/algorithms/on_policy/base/policy_gradient.py @@ -314,7 +314,7 @@ def _update_actor( # pylint: disable=too-many-arguments self._actor_critic.actor_optimizer.step() self._logger.store( **{ - 'Train/Entropy': info['entrophy'], + 'Train/Entropy': info['entropy'], 'Train/PolicyRatio': info['ratio'], 'Train/PolicyStd': info['std'], 'Loss/Loss_pi': loss.mean().item(), @@ -338,6 +338,6 @@ def _loss_pi( std = self._actor_critic.actor.std ratio = torch.exp(logp_ - logp) loss = -(ratio * adv).mean() - entrophy = distribution.entropy().mean().item() - info = {'entrophy': entrophy, 'ratio': ratio.mean().item(), 'std': std} + entropy = distribution.entropy().mean().item() + info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std} return loss, info diff --git a/omnisafe/algorithms/on_policy/base/ppo.py b/omnisafe/algorithms/on_policy/base/ppo.py index 4df3f2416..98bedd32b 100644 --- a/omnisafe/algorithms/on_policy/base/ppo.py +++ b/omnisafe/algorithms/on_policy/base/ppo.py @@ -63,6 +63,6 @@ def _loss_pi( loss = -torch.min(ratio * adv, ratio_cliped * adv).mean() loss += self._cfgs.algo_cfgs.entropy_coef * distribution.entropy().mean() # useful extra info - entrophy = distribution.entropy().mean().item() - info = {'entrophy': entrophy, 'ratio': ratio.mean().item(), 'std': std} + entropy = distribution.entropy().mean().item() + info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std} return loss, info diff --git a/omnisafe/algorithms/on_policy/base/trpo.py b/omnisafe/algorithms/on_policy/base/trpo.py index 6285954ce..b7dcef6df 100644 --- a/omnisafe/algorithms/on_policy/base/trpo.py +++ b/omnisafe/algorithms/on_policy/base/trpo.py @@ -192,7 +192,7 @@ def _update_actor( # pylint: disable=too-many-arguments,too-many-locals self._logger.store( **{ - 'Train/Entropy': info['entrophy'], + 'Train/Entropy': info['entropy'], 'Train/PolicyRatio': info['ratio'], 'Train/PolicyStd': info['std'], 'Loss/Loss_pi': loss.mean().item(), diff --git a/omnisafe/algorithms/on_policy/first_order/cup.py b/omnisafe/algorithms/on_policy/first_order/cup.py index 2f3bf2aa4..85adc53cc 100644 --- a/omnisafe/algorithms/on_policy/first_order/cup.py +++ b/omnisafe/algorithms/on_policy/first_order/cup.py @@ -103,8 +103,8 @@ def _loss_pi_cost(self, obs, act, logp, adv_c): self._max_ratio = temp_max if temp_min < self._min_ratio: self._min_ratio = temp_min - entrophy = distribution.entropy().mean().item() - info = {'entrophy': entrophy, 'ratio': ratio.mean().item(), 'std': std} + entropy = distribution.entropy().mean().item() + info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std} self._logger.store(**{'Loss/Loss_pi_c': loss.item()}) @@ -175,7 +175,7 @@ def _update(self) -> None: 'Train/MaxRatio': self._max_ratio, 'Train/MinRatio': self._min_ratio, 'Train/SecondStepStopIter': i + 1, - 'Train/SecondStepEntropy': info['entrophy'], + 'Train/SecondStepEntropy': info['entropy'], 'Train/SecondStepPolicyRatio': info['ratio'], } ) diff --git a/omnisafe/algorithms/on_policy/first_order/focops.py b/omnisafe/algorithms/on_policy/first_order/focops.py index 7006a484e..3a4185dea 100644 --- a/omnisafe/algorithms/on_policy/first_order/focops.py +++ b/omnisafe/algorithms/on_policy/first_order/focops.py @@ -64,8 +64,8 @@ def _loss_pi( loss = loss.mean() loss -= self._cfgs.algo_cfgs.entropy_coef * distribution.entropy().mean() - entrophy = distribution.entropy().mean().item() - info = {'entrophy': entrophy, 'ratio': ratio.mean().item(), 'std': std} + entropy = distribution.entropy().mean().item() + info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std} return loss, info def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> torch.Tensor: diff --git a/omnisafe/algorithms/on_policy/penalty_function/p3o.py b/omnisafe/algorithms/on_policy/penalty_function/p3o.py index debf4c0a6..6ec64b1ae 100644 --- a/omnisafe/algorithms/on_policy/penalty_function/p3o.py +++ b/omnisafe/algorithms/on_policy/penalty_function/p3o.py @@ -100,7 +100,7 @@ def _update_actor( self._logger.store( **{ - 'Train/Entropy': info['entrophy'], + 'Train/Entropy': info['entropy'], 'Train/PolicyRatio': info['ratio'], 'Train/PolicyStd': info['std'], 'Loss/Loss_pi': loss_reward.mean().item(), diff --git a/omnisafe/algorithms/on_policy/second_order/cpo.py b/omnisafe/algorithms/on_policy/second_order/cpo.py index d02dcac20..c8ecb3250 100644 --- a/omnisafe/algorithms/on_policy/second_order/cpo.py +++ b/omnisafe/algorithms/on_policy/second_order/cpo.py @@ -347,7 +347,7 @@ def f_b(lam): self._logger.store( **{ 'Loss/Loss_pi': loss.item(), - 'Train/Entropy': info['entrophy'], + 'Train/Entropy': info['entropy'], 'Train/PolicyRatio': info['ratio'], 'Train/PolicyStd': info['std'], 'Misc/AcceptanceStep': accept_step, diff --git a/omnisafe/algorithms/on_policy/second_order/pcpo.py b/omnisafe/algorithms/on_policy/second_order/pcpo.py index 8642e1d4f..e270dfa6f 100644 --- a/omnisafe/algorithms/on_policy/second_order/pcpo.py +++ b/omnisafe/algorithms/on_policy/second_order/pcpo.py @@ -135,7 +135,7 @@ def _update_actor( self._logger.store( **{ 'Loss/Loss_pi': loss.item(), - 'Train/Entropy': info['entrophy'], + 'Train/Entropy': info['entropy'], 'Train/PolicyRatio': info['ratio'], 'Train/PolicyStd': info['std'], 'Misc/AcceptanceStep': accept_step,