Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(common/logger): refactor and simplify logger storage logic #216

Merged
merged 1 commit into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions omnisafe/adapter/offpolicy_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def eval_policy( # pylint: disable=too-many-locals
done = terminated or truncated
if done:
logger.store(
**{
{
'Metrics/TestEpRet': ep_ret,
'Metrics/TestEpCost': ep_cost,
'Metrics/TestEpLen': ep_len,
Expand Down Expand Up @@ -197,7 +197,7 @@ def _log_metrics(self, logger: Logger, idx: int) -> None:
idx (int): The index of the environment.
"""
logger.store(
**{
{
'Metrics/EpRet': self._ep_ret[idx],
'Metrics/EpCost': self._ep_cost[idx],
'Metrics/EpLen': self._ep_len[idx],
Expand Down
6 changes: 3 additions & 3 deletions omnisafe/adapter/onpolicy_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ def roll_out( # pylint: disable=too-many-locals
self._log_value(reward=reward, cost=cost, info=info)

if self._cfgs.algo_cfgs.use_cost:
logger.store(**{'Value/cost': value_c})
logger.store(**{'Value/reward': value_r})
logger.store({'Value/cost': value_c})
logger.store({'Value/reward': value_r})

buffer.store(
obs=obs,
Expand Down Expand Up @@ -169,7 +169,7 @@ def _log_metrics(self, logger: Logger, idx: int) -> None:
idx (int): The index of the environment.
"""
logger.store(
**{
{
'Metrics/EpRet': self._ep_ret[idx],
'Metrics/EpCost': self._ep_cost[idx],
'Metrics/EpLen': self._ep_len[idx],
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/adapter/saute_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,4 @@ def _reset_log(self, idx: int | None = None) -> None:

def _log_metrics(self, logger: Logger, idx: int) -> None:
super()._log_metrics(logger, idx)
logger.store(**{'Metrics/EpBudget': self._ep_budget[idx]})
logger.store({'Metrics/EpBudget': self._ep_budget[idx]})
16 changes: 8 additions & 8 deletions omnisafe/algorithms/off_policy/ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ def learn(self) -> tuple[int | float, ...]:
logger=self._logger,
)

self._logger.store(**{'Time/Update': update_time})
self._logger.store(**{'Time/Rollout': roll_out_time})
self._logger.store({'Time/Update': update_time})
self._logger.store({'Time/Rollout': roll_out_time})

if (
step > self._cfgs.algo_cfgs.start_learning_steps
Expand All @@ -202,7 +202,7 @@ def learn(self) -> tuple[int | float, ...]:
self._actor_critic.actor_scheduler.step()

self._logger.store(
**{
{
'TotalEnvSteps': step + 1,
'Time/FPS': self._cfgs.algo_cfgs.steps_per_epoch / (time.time() - epoch_time),
'Time/Total': (time.time() - start_time),
Expand Down Expand Up @@ -265,7 +265,7 @@ def _update_reward_critic(
for param in self._actor_critic.reward_critic.parameters():
loss += param.pow(2).sum() * self._cfgs.algo_cfgs.critic_norm_coeff
self._logger.store(
**{
{
'Loss/Loss_reward_critic': loss.mean().item(),
'Value/reward_critic': q_value_r.mean().item(),
},
Expand Down Expand Up @@ -312,7 +312,7 @@ def _update_cost_critic(
self._actor_critic.cost_critic_optimizer.step()

self._logger.store(
**{
{
'Loss/Loss_cost_critic': loss.mean().item(),
'Value/cost_critic': q_value_c.mean().item(),
},
Expand All @@ -332,7 +332,7 @@ def _update_actor( # pylint: disable=too-many-arguments
)
self._actor_critic.actor_optimizer.step()
self._logger.store(
**{
{
'Loss/Loss_pi': loss.mean().item(),
},
)
Expand All @@ -346,15 +346,15 @@ def _loss_pi(

def _log_when_not_update(self) -> None:
self._logger.store(
**{
{
'Loss/Loss_reward_critic': 0.0,
'Loss/Loss_pi': 0.0,
'Value/reward_critic': 0.0,
},
)
if self._cfgs.algo_cfgs.use_cost:
self._logger.store(
**{
{
'Loss/Loss_cost_critic': 0.0,
'Value/cost_critic': 0.0,
},
Expand Down
4 changes: 2 additions & 2 deletions omnisafe/algorithms/off_policy/ddpg_lag.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _update(self) -> None:
Jc = self._logger.get_stats('Metrics/EpCost')[0]
self._lagrange.update_lagrange_multiplier(Jc)
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
Expand All @@ -68,7 +68,7 @@ def _loss_pi(
def _log_when_not_update(self) -> None:
super()._log_when_not_update()
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
10 changes: 5 additions & 5 deletions omnisafe/algorithms/off_policy/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _update_reward_critic(
distributed.avg_grads(self._actor_critic.reward_critic)
self._actor_critic.reward_critic_optimizer.step()
self._logger.store(
**{
{
'Loss/Loss_reward_critic': loss.mean().item(),
'Value/reward_critic': q1_value_r.mean().item(),
},
Expand All @@ -139,12 +139,12 @@ def _update_actor(
alpha_loss.backward()
self._alpha_optimizer.step()
self._logger.store(
**{
{
'Loss/alpha_loss': alpha_loss.mean().item(),
},
)
self._logger.store(
**{
{
'Value/alpha': self._alpha,
},
)
Expand All @@ -161,13 +161,13 @@ def _loss_pi(
def _log_when_not_update(self) -> None:
super()._log_when_not_update()
self._logger.store(
**{
{
'Value/alpha': self._alpha,
},
)
if self._cfgs.algo_cfgs.auto_alpha:
self._logger.store(
**{
{
'Loss/alpha_loss': 0.0,
},
)
4 changes: 2 additions & 2 deletions omnisafe/algorithms/off_policy/sac_lag.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _update(self) -> None:
Jc = self._logger.get_stats('Metrics/EpCost')[0]
self._lagrange.update_lagrange_multiplier(Jc)
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
Expand All @@ -67,7 +67,7 @@ def _loss_pi(
def _log_when_not_update(self) -> None:
super()._log_when_not_update()
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
2 changes: 1 addition & 1 deletion omnisafe/algorithms/off_policy/td3.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def _update_reward_critic(
distributed.avg_grads(self._actor_critic.reward_critic)
self._actor_critic.reward_critic_optimizer.step()
self._logger.store(
**{
{
'Loss/Loss_reward_critic': loss.mean().item(),
'Value/reward_critic': q1_value_r.mean().item(),
},
Expand Down
4 changes: 2 additions & 2 deletions omnisafe/algorithms/off_policy/td3_lag.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _update(self) -> None:
Jc = self._logger.get_stats('Metrics/EpCost')[0]
self._lagrange.update_lagrange_multiplier(Jc)
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
Expand All @@ -65,7 +65,7 @@ def _loss_pi(
def _log_when_not_update(self) -> None:
super()._log_when_not_update()
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
6 changes: 3 additions & 3 deletions omnisafe/algorithms/on_policy/base/natural_pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _fvp(self, params: torch.Tensor) -> torch.Tensor:
distributed.avg_tensor(flat_grad_grad_kl)

self._logger.store(
**{
{
'Train/KL': kl.item(),
},
)
Expand Down Expand Up @@ -164,7 +164,7 @@ def _update_actor( # pylint: disable=too-many-arguments,too-many-locals
loss, info = self._loss_pi(obs, act, logp, adv)

self._logger.store(
**{
{
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
Expand Down Expand Up @@ -225,7 +225,7 @@ def _update(self) -> None:
self._update_cost_critic(obs, target_value_c)

self._logger.store(
**{
{
'Train/StopIter': self._cfgs.algo_cfgs.update_iters,
'Value/Adv': adv_r.mean().item(),
},
Expand Down
14 changes: 7 additions & 7 deletions omnisafe/algorithms/on_policy/base/policy_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,11 @@ def learn(self) -> tuple[int | float, ...]:
buffer=self._buf,
logger=self._logger,
)
self._logger.store(**{'Time/Rollout': time.time() - roll_out_time})
self._logger.store({'Time/Rollout': time.time() - roll_out_time})

update_time = time.time()
self._update()
self._logger.store(**{'Time/Update': time.time() - update_time})
self._logger.store({'Time/Update': time.time() - update_time})

if self._cfgs.model_cfgs.exploration_noise_anneal:
self._actor_critic.annealing(epoch)
Expand All @@ -269,7 +269,7 @@ def learn(self) -> tuple[int | float, ...]:
self._actor_critic.actor_scheduler.step()

self._logger.store(
**{
{
'TotalEnvSteps': (epoch + 1) * self._cfgs.algo_cfgs.steps_per_epoch,
'Time/FPS': self._cfgs.algo_cfgs.steps_per_epoch / (time.time() - epoch_time),
'Time/Total': (time.time() - start_time),
Expand Down Expand Up @@ -390,7 +390,7 @@ def _update(self) -> None:
break

self._logger.store(
**{
{
'Train/StopIter': update_counts, # pylint: disable=undefined-loop-variable
'Value/Adv': adv_r.mean().item(),
'Train/KL': final_kl,
Expand Down Expand Up @@ -434,7 +434,7 @@ def _update_reward_critic(self, obs: torch.Tensor, target_value_r: torch.Tensor)
distributed.avg_grads(self._actor_critic.reward_critic)
self._actor_critic.reward_critic_optimizer.step()

self._logger.store(**{'Loss/Loss_reward_critic': loss.mean().item()})
self._logger.store({'Loss/Loss_reward_critic': loss.mean().item()})

def _update_cost_critic(self, obs: torch.Tensor, target_value_c: torch.Tensor) -> None:
r"""Update value network under a double for loop.
Expand Down Expand Up @@ -473,7 +473,7 @@ def _update_cost_critic(self, obs: torch.Tensor, target_value_c: torch.Tensor) -
distributed.avg_grads(self._actor_critic.cost_critic)
self._actor_critic.cost_critic_optimizer.step()

self._logger.store(**{'Loss/Loss_cost_critic': loss.mean().item()})
self._logger.store({'Loss/Loss_cost_critic': loss.mean().item()})

def _update_actor( # pylint: disable=too-many-arguments
self,
Expand Down Expand Up @@ -515,7 +515,7 @@ def _update_actor( # pylint: disable=too-many-arguments
distributed.avg_grads(self._actor_critic.actor)
self._actor_critic.actor_optimizer.step()
self._logger.store(
**{
{
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
Expand Down
4 changes: 2 additions & 2 deletions omnisafe/algorithms/on_policy/base/trpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def _search_step_size(
set_param_values_to_model(self._actor_critic.actor, theta_old)

self._logger.store(
**{
{
'Train/KL': final_kl,
},
)
Expand Down Expand Up @@ -199,7 +199,7 @@ def _update_actor( # pylint: disable=too-many-arguments,too-many-locals
loss, info = self._loss_pi(obs, act, logp, adv)

self._logger.store(
**{
{
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
Expand Down
4 changes: 2 additions & 2 deletions omnisafe/algorithms/on_policy/first_order/cup.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _loss_pi_cost(self, obs, act, logp, adv_c):
entropy = distribution.entropy().mean().item()
info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std}

self._logger.store(**{'Loss/Loss_pi_c': loss.item()})
self._logger.store({'Loss/Loss_pi_c': loss.item()})

return loss, info

Expand Down Expand Up @@ -195,7 +195,7 @@ def _update(self) -> None:
break

self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.item(),
'Train/SecondStepStopIter': i + 1, # pylint: disable=undefined-loop-variable
'Train/SecondStepEntropy': info['entropy'],
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/first_order/focops.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def _update(self) -> None:
break

self._logger.store(
**{
{
'Train/StopIter': i + 1, # pylint: disable=undefined-loop-variable
'Value/Adv': adv_r.mean().item(),
'Train/KL': kl,
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/naive_lagrange/pdo.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def _update(self) -> None:
# then update the policy and value function
super()._update()

self._logger.store(**{'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})
self._logger.store({'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})

def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> torch.Tensor:
penalty = self._lagrange.lagrangian_multiplier.item()
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/naive_lagrange/ppo_lag.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _update(self) -> None:
# then update the policy and value function
super()._update()

self._logger.store(**{'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})
self._logger.store({'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})

def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> torch.Tensor:
r"""Compute surrogate loss.
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/naive_lagrange/rcpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _update(self) -> None:
# then update the policy and value function
super()._update()

self._logger.store(**{'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})
self._logger.store({'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})

def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> torch.Tensor:
penalty = self._lagrange.lagrangian_multiplier.item()
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/naive_lagrange/trpo_lag.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _update(self) -> None:
# then update the policy and value function
super()._update()

self._logger.store(**{'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})
self._logger.store({'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})

def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> torch.Tensor:
r"""Compute surrogate loss.
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/penalty_function/ipo.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,6 @@ def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> to
if penalty < 0 or penalty > self._cfgs.algo_cfgs.penalty_max:
penalty = self._cfgs.algo_cfgs.penalty_max

self._logger.store(**{'Misc/Penalty': penalty})
self._logger.store({'Misc/Penalty': penalty})

return (adv_r - penalty * adv_c) / (1 + penalty)
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/penalty_function/p3o.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def _update_actor(
self._actor_critic.actor_optimizer.step()

self._logger.store(
**{
{
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
Expand Down
Loading