Skip to content

Commit

Permalink
chore: fix typo. (PKU-Alignment#134)
Browse files Browse the repository at this point in the history
  • Loading branch information
1Asan authored and zmsn-2077 committed Mar 14, 2023
1 parent cb227a6 commit f3706c5
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/base/natural_pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def _update_actor( # pylint: disable=too-many-arguments, too-many-locals

self._logger.store(
**{
'Train/Entropy': info['entrophy'],
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
'Loss/Loss_pi': loss.mean().item(),
Expand Down
6 changes: 3 additions & 3 deletions omnisafe/algorithms/on_policy/base/policy_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def _update_actor( # pylint: disable=too-many-arguments
self._actor_critic.actor_optimizer.step()
self._logger.store(
**{
'Train/Entropy': info['entrophy'],
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
'Loss/Loss_pi': loss.mean().item(),
Expand All @@ -338,6 +338,6 @@ def _loss_pi(
std = self._actor_critic.actor.std
ratio = torch.exp(logp_ - logp)
loss = -(ratio * adv).mean()
entrophy = distribution.entropy().mean().item()
info = {'entrophy': entrophy, 'ratio': ratio.mean().item(), 'std': std}
entropy = distribution.entropy().mean().item()
info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std}
return loss, info
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/base/trpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def _update_actor( # pylint: disable=too-many-arguments,too-many-locals

self._logger.store(
**{
'Train/Entropy': info['entrophy'],
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
'Loss/Loss_pi': loss.mean().item(),
Expand Down
6 changes: 3 additions & 3 deletions omnisafe/algorithms/on_policy/first_order/cup.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ def _loss_pi_cost(self, obs, act, logp, adv_c):
self._max_ratio = temp_max
if temp_min < self._min_ratio:
self._min_ratio = temp_min
entrophy = distribution.entropy().mean().item()
info = {'entrophy': entrophy, 'ratio': ratio.mean().item(), 'std': std}
entropy = distribution.entropy().mean().item()
info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std}

self._logger.store(**{'Loss/Loss_pi_c': loss.item()})

Expand Down Expand Up @@ -175,7 +175,7 @@ def _update(self) -> None:
'Train/MaxRatio': self._max_ratio,
'Train/MinRatio': self._min_ratio,
'Train/SecondStepStopIter': i + 1,
'Train/SecondStepEntropy': info['entrophy'],
'Train/SecondStepEntropy': info['entropy'],
'Train/SecondStepPolicyRatio': info['ratio'],
}
)
4 changes: 2 additions & 2 deletions omnisafe/algorithms/on_policy/first_order/focops.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def _loss_pi(
loss = loss.mean()
loss -= self._cfgs.algo_cfgs.entropy_coef * distribution.entropy().mean()

entrophy = distribution.entropy().mean().item()
info = {'entrophy': entrophy, 'ratio': ratio.mean().item(), 'std': std}
entropy = distribution.entropy().mean().item()
info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std}
return loss, info

def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> torch.Tensor:
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/second_order/cpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def f_b(lam):
self._logger.store(
**{
'Loss/Loss_pi': loss.item(),
'Train/Entropy': info['entrophy'],
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
'Misc/AcceptanceStep': accept_step,
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/second_order/pcpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def _update_actor(
self._logger.store(
**{
'Loss/Loss_pi': loss.item(),
'Train/Entropy': info['entrophy'],
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
'Misc/AcceptanceStep': accept_step,
Expand Down

0 comments on commit f3706c5

Please sign in to comment.