From f3706c5eebc68ce6283191d52dfd71abd9e14c12 Mon Sep 17 00:00:00 2001 From: 1Asan <99461435+1Asan@users.noreply.github.com> Date: Tue, 7 Mar 2023 02:19:33 +0800 Subject: [PATCH] chore: fix typo. (#134) --- omnisafe/algorithms/on_policy/base/natural_pg.py | 2 +- omnisafe/algorithms/on_policy/base/policy_gradient.py | 6 +++--- omnisafe/algorithms/on_policy/base/trpo.py | 2 +- omnisafe/algorithms/on_policy/first_order/cup.py | 6 +++--- omnisafe/algorithms/on_policy/first_order/focops.py | 4 ++-- omnisafe/algorithms/on_policy/second_order/cpo.py | 2 +- omnisafe/algorithms/on_policy/second_order/pcpo.py | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/omnisafe/algorithms/on_policy/base/natural_pg.py b/omnisafe/algorithms/on_policy/base/natural_pg.py index cf33dca82..a98b206ba 100644 --- a/omnisafe/algorithms/on_policy/base/natural_pg.py +++ b/omnisafe/algorithms/on_policy/base/natural_pg.py @@ -116,7 +116,7 @@ def _update_actor( # pylint: disable=too-many-arguments, too-many-locals self._logger.store( **{ - 'Train/Entropy': info['entrophy'], + 'Train/Entropy': info['entropy'], 'Train/PolicyRatio': info['ratio'], 'Train/PolicyStd': info['std'], 'Loss/Loss_pi': loss.mean().item(), diff --git a/omnisafe/algorithms/on_policy/base/policy_gradient.py b/omnisafe/algorithms/on_policy/base/policy_gradient.py index cd194e35a..acaa7c814 100644 --- a/omnisafe/algorithms/on_policy/base/policy_gradient.py +++ b/omnisafe/algorithms/on_policy/base/policy_gradient.py @@ -314,7 +314,7 @@ def _update_actor( # pylint: disable=too-many-arguments self._actor_critic.actor_optimizer.step() self._logger.store( **{ - 'Train/Entropy': info['entrophy'], + 'Train/Entropy': info['entropy'], 'Train/PolicyRatio': info['ratio'], 'Train/PolicyStd': info['std'], 'Loss/Loss_pi': loss.mean().item(), @@ -338,6 +338,6 @@ def _loss_pi( std = self._actor_critic.actor.std ratio = torch.exp(logp_ - logp) loss = -(ratio * adv).mean() - entrophy = distribution.entropy().mean().item() - info = {'entrophy': entrophy, 'ratio': ratio.mean().item(), 'std': std} + entropy = distribution.entropy().mean().item() + info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std} return loss, info diff --git a/omnisafe/algorithms/on_policy/base/trpo.py b/omnisafe/algorithms/on_policy/base/trpo.py index 6285954ce..b7dcef6df 100644 --- a/omnisafe/algorithms/on_policy/base/trpo.py +++ b/omnisafe/algorithms/on_policy/base/trpo.py @@ -192,7 +192,7 @@ def _update_actor( # pylint: disable=too-many-arguments,too-many-locals self._logger.store( **{ - 'Train/Entropy': info['entrophy'], + 'Train/Entropy': info['entropy'], 'Train/PolicyRatio': info['ratio'], 'Train/PolicyStd': info['std'], 'Loss/Loss_pi': loss.mean().item(), diff --git a/omnisafe/algorithms/on_policy/first_order/cup.py b/omnisafe/algorithms/on_policy/first_order/cup.py index 2f3bf2aa4..85adc53cc 100644 --- a/omnisafe/algorithms/on_policy/first_order/cup.py +++ b/omnisafe/algorithms/on_policy/first_order/cup.py @@ -103,8 +103,8 @@ def _loss_pi_cost(self, obs, act, logp, adv_c): self._max_ratio = temp_max if temp_min < self._min_ratio: self._min_ratio = temp_min - entrophy = distribution.entropy().mean().item() - info = {'entrophy': entrophy, 'ratio': ratio.mean().item(), 'std': std} + entropy = distribution.entropy().mean().item() + info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std} self._logger.store(**{'Loss/Loss_pi_c': loss.item()}) @@ -175,7 +175,7 @@ def _update(self) -> None: 'Train/MaxRatio': self._max_ratio, 'Train/MinRatio': self._min_ratio, 'Train/SecondStepStopIter': i + 1, - 'Train/SecondStepEntropy': info['entrophy'], + 'Train/SecondStepEntropy': info['entropy'], 'Train/SecondStepPolicyRatio': info['ratio'], } ) diff --git a/omnisafe/algorithms/on_policy/first_order/focops.py b/omnisafe/algorithms/on_policy/first_order/focops.py index 7006a484e..3a4185dea 100644 --- a/omnisafe/algorithms/on_policy/first_order/focops.py +++ b/omnisafe/algorithms/on_policy/first_order/focops.py @@ -64,8 +64,8 @@ def _loss_pi( loss = loss.mean() loss -= self._cfgs.algo_cfgs.entropy_coef * distribution.entropy().mean() - entrophy = distribution.entropy().mean().item() - info = {'entrophy': entrophy, 'ratio': ratio.mean().item(), 'std': std} + entropy = distribution.entropy().mean().item() + info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std} return loss, info def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> torch.Tensor: diff --git a/omnisafe/algorithms/on_policy/second_order/cpo.py b/omnisafe/algorithms/on_policy/second_order/cpo.py index d02dcac20..c8ecb3250 100644 --- a/omnisafe/algorithms/on_policy/second_order/cpo.py +++ b/omnisafe/algorithms/on_policy/second_order/cpo.py @@ -347,7 +347,7 @@ def f_b(lam): self._logger.store( **{ 'Loss/Loss_pi': loss.item(), - 'Train/Entropy': info['entrophy'], + 'Train/Entropy': info['entropy'], 'Train/PolicyRatio': info['ratio'], 'Train/PolicyStd': info['std'], 'Misc/AcceptanceStep': accept_step, diff --git a/omnisafe/algorithms/on_policy/second_order/pcpo.py b/omnisafe/algorithms/on_policy/second_order/pcpo.py index 8642e1d4f..e270dfa6f 100644 --- a/omnisafe/algorithms/on_policy/second_order/pcpo.py +++ b/omnisafe/algorithms/on_policy/second_order/pcpo.py @@ -135,7 +135,7 @@ def _update_actor( self._logger.store( **{ 'Loss/Loss_pi': loss.item(), - 'Train/Entropy': info['entrophy'], + 'Train/Entropy': info['entropy'], 'Train/PolicyRatio': info['ratio'], 'Train/PolicyStd': info['std'], 'Misc/AcceptanceStep': accept_step,