Skip to content

Commit

Permalink
Merge c430010 into d17aa77
Browse files Browse the repository at this point in the history
  • Loading branch information
XuehaiPan authored Apr 17, 2023
2 parents d17aa77 + c430010 commit 6e2ba34
Show file tree
Hide file tree
Showing 25 changed files with 61 additions and 54 deletions.
4 changes: 2 additions & 2 deletions omnisafe/adapter/offpolicy_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def eval_policy( # pylint: disable=too-many-locals
done = terminated or truncated
if done:
logger.store(
**{
{
'Metrics/TestEpRet': ep_ret,
'Metrics/TestEpCost': ep_cost,
'Metrics/TestEpLen': ep_len,
Expand Down Expand Up @@ -197,7 +197,7 @@ def _log_metrics(self, logger: Logger, idx: int) -> None:
idx (int): The index of the environment.
"""
logger.store(
**{
{
'Metrics/EpRet': self._ep_ret[idx],
'Metrics/EpCost': self._ep_cost[idx],
'Metrics/EpLen': self._ep_len[idx],
Expand Down
6 changes: 3 additions & 3 deletions omnisafe/adapter/onpolicy_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ def roll_out( # pylint: disable=too-many-locals
self._log_value(reward=reward, cost=cost, info=info)

if self._cfgs.algo_cfgs.use_cost:
logger.store(**{'Value/cost': value_c})
logger.store(**{'Value/reward': value_r})
logger.store({'Value/cost': value_c})
logger.store({'Value/reward': value_r})

buffer.store(
obs=obs,
Expand Down Expand Up @@ -169,7 +169,7 @@ def _log_metrics(self, logger: Logger, idx: int) -> None:
idx (int): The index of the environment.
"""
logger.store(
**{
{
'Metrics/EpRet': self._ep_ret[idx],
'Metrics/EpCost': self._ep_cost[idx],
'Metrics/EpLen': self._ep_len[idx],
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/adapter/saute_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,4 @@ def _reset_log(self, idx: int | None = None) -> None:

def _log_metrics(self, logger: Logger, idx: int) -> None:
super()._log_metrics(logger, idx)
logger.store(**{'Metrics/EpBudget': self._ep_budget[idx]})
logger.store({'Metrics/EpBudget': self._ep_budget[idx]})
16 changes: 8 additions & 8 deletions omnisafe/algorithms/off_policy/ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ def learn(self) -> tuple[int | float, ...]:
logger=self._logger,
)

self._logger.store(**{'Time/Update': update_time})
self._logger.store(**{'Time/Rollout': roll_out_time})
self._logger.store({'Time/Update': update_time})
self._logger.store({'Time/Rollout': roll_out_time})

if (
step > self._cfgs.algo_cfgs.start_learning_steps
Expand All @@ -202,7 +202,7 @@ def learn(self) -> tuple[int | float, ...]:
self._actor_critic.actor_scheduler.step()

self._logger.store(
**{
{
'TotalEnvSteps': step + 1,
'Time/FPS': self._cfgs.algo_cfgs.steps_per_epoch / (time.time() - epoch_time),
'Time/Total': (time.time() - start_time),
Expand Down Expand Up @@ -265,7 +265,7 @@ def _update_reward_critic(
for param in self._actor_critic.reward_critic.parameters():
loss += param.pow(2).sum() * self._cfgs.algo_cfgs.critic_norm_coeff
self._logger.store(
**{
{
'Loss/Loss_reward_critic': loss.mean().item(),
'Value/reward_critic': q_value_r.mean().item(),
},
Expand Down Expand Up @@ -312,7 +312,7 @@ def _update_cost_critic(
self._actor_critic.cost_critic_optimizer.step()

self._logger.store(
**{
{
'Loss/Loss_cost_critic': loss.mean().item(),
'Value/cost_critic': q_value_c.mean().item(),
},
Expand All @@ -332,7 +332,7 @@ def _update_actor( # pylint: disable=too-many-arguments
)
self._actor_critic.actor_optimizer.step()
self._logger.store(
**{
{
'Loss/Loss_pi': loss.mean().item(),
},
)
Expand All @@ -346,15 +346,15 @@ def _loss_pi(

def _log_when_not_update(self) -> None:
self._logger.store(
**{
{
'Loss/Loss_reward_critic': 0.0,
'Loss/Loss_pi': 0.0,
'Value/reward_critic': 0.0,
},
)
if self._cfgs.algo_cfgs.use_cost:
self._logger.store(
**{
{
'Loss/Loss_cost_critic': 0.0,
'Value/cost_critic': 0.0,
},
Expand Down
4 changes: 2 additions & 2 deletions omnisafe/algorithms/off_policy/ddpg_lag.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _update(self) -> None:
Jc = self._logger.get_stats('Metrics/EpCost')[0]
self._lagrange.update_lagrange_multiplier(Jc)
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
Expand All @@ -68,7 +68,7 @@ def _loss_pi(
def _log_when_not_update(self) -> None:
super()._log_when_not_update()
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
10 changes: 5 additions & 5 deletions omnisafe/algorithms/off_policy/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _update_reward_critic(
distributed.avg_grads(self._actor_critic.reward_critic)
self._actor_critic.reward_critic_optimizer.step()
self._logger.store(
**{
{
'Loss/Loss_reward_critic': loss.mean().item(),
'Value/reward_critic': q1_value_r.mean().item(),
},
Expand All @@ -139,12 +139,12 @@ def _update_actor(
alpha_loss.backward()
self._alpha_optimizer.step()
self._logger.store(
**{
{
'Loss/alpha_loss': alpha_loss.mean().item(),
},
)
self._logger.store(
**{
{
'Value/alpha': self._alpha,
},
)
Expand All @@ -161,13 +161,13 @@ def _loss_pi(
def _log_when_not_update(self) -> None:
super()._log_when_not_update()
self._logger.store(
**{
{
'Value/alpha': self._alpha,
},
)
if self._cfgs.algo_cfgs.auto_alpha:
self._logger.store(
**{
{
'Loss/alpha_loss': 0.0,
},
)
4 changes: 2 additions & 2 deletions omnisafe/algorithms/off_policy/sac_lag.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _update(self) -> None:
Jc = self._logger.get_stats('Metrics/EpCost')[0]
self._lagrange.update_lagrange_multiplier(Jc)
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
Expand All @@ -67,7 +67,7 @@ def _loss_pi(
def _log_when_not_update(self) -> None:
super()._log_when_not_update()
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
2 changes: 1 addition & 1 deletion omnisafe/algorithms/off_policy/td3.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def _update_reward_critic(
distributed.avg_grads(self._actor_critic.reward_critic)
self._actor_critic.reward_critic_optimizer.step()
self._logger.store(
**{
{
'Loss/Loss_reward_critic': loss.mean().item(),
'Value/reward_critic': q1_value_r.mean().item(),
},
Expand Down
4 changes: 2 additions & 2 deletions omnisafe/algorithms/off_policy/td3_lag.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _update(self) -> None:
Jc = self._logger.get_stats('Metrics/EpCost')[0]
self._lagrange.update_lagrange_multiplier(Jc)
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
Expand All @@ -65,7 +65,7 @@ def _loss_pi(
def _log_when_not_update(self) -> None:
super()._log_when_not_update()
self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.data.item(),
},
)
6 changes: 3 additions & 3 deletions omnisafe/algorithms/on_policy/base/natural_pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _fvp(self, params: torch.Tensor) -> torch.Tensor:
distributed.avg_tensor(flat_grad_grad_kl)

self._logger.store(
**{
{
'Train/KL': kl.item(),
},
)
Expand Down Expand Up @@ -164,7 +164,7 @@ def _update_actor( # pylint: disable=too-many-arguments,too-many-locals
loss, info = self._loss_pi(obs, act, logp, adv)

self._logger.store(
**{
{
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
Expand Down Expand Up @@ -225,7 +225,7 @@ def _update(self) -> None:
self._update_cost_critic(obs, target_value_c)

self._logger.store(
**{
{
'Train/StopIter': self._cfgs.algo_cfgs.update_iters,
'Value/Adv': adv_r.mean().item(),
},
Expand Down
14 changes: 7 additions & 7 deletions omnisafe/algorithms/on_policy/base/policy_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,11 @@ def learn(self) -> tuple[int | float, ...]:
buffer=self._buf,
logger=self._logger,
)
self._logger.store(**{'Time/Rollout': time.time() - roll_out_time})
self._logger.store({'Time/Rollout': time.time() - roll_out_time})

update_time = time.time()
self._update()
self._logger.store(**{'Time/Update': time.time() - update_time})
self._logger.store({'Time/Update': time.time() - update_time})

if self._cfgs.model_cfgs.exploration_noise_anneal:
self._actor_critic.annealing(epoch)
Expand All @@ -269,7 +269,7 @@ def learn(self) -> tuple[int | float, ...]:
self._actor_critic.actor_scheduler.step()

self._logger.store(
**{
{
'TotalEnvSteps': (epoch + 1) * self._cfgs.algo_cfgs.steps_per_epoch,
'Time/FPS': self._cfgs.algo_cfgs.steps_per_epoch / (time.time() - epoch_time),
'Time/Total': (time.time() - start_time),
Expand Down Expand Up @@ -390,7 +390,7 @@ def _update(self) -> None:
break

self._logger.store(
**{
{
'Train/StopIter': update_counts, # pylint: disable=undefined-loop-variable
'Value/Adv': adv_r.mean().item(),
'Train/KL': final_kl,
Expand Down Expand Up @@ -434,7 +434,7 @@ def _update_reward_critic(self, obs: torch.Tensor, target_value_r: torch.Tensor)
distributed.avg_grads(self._actor_critic.reward_critic)
self._actor_critic.reward_critic_optimizer.step()

self._logger.store(**{'Loss/Loss_reward_critic': loss.mean().item()})
self._logger.store({'Loss/Loss_reward_critic': loss.mean().item()})

def _update_cost_critic(self, obs: torch.Tensor, target_value_c: torch.Tensor) -> None:
r"""Update value network under a double for loop.
Expand Down Expand Up @@ -473,7 +473,7 @@ def _update_cost_critic(self, obs: torch.Tensor, target_value_c: torch.Tensor) -
distributed.avg_grads(self._actor_critic.cost_critic)
self._actor_critic.cost_critic_optimizer.step()

self._logger.store(**{'Loss/Loss_cost_critic': loss.mean().item()})
self._logger.store({'Loss/Loss_cost_critic': loss.mean().item()})

def _update_actor( # pylint: disable=too-many-arguments
self,
Expand Down Expand Up @@ -515,7 +515,7 @@ def _update_actor( # pylint: disable=too-many-arguments
distributed.avg_grads(self._actor_critic.actor)
self._actor_critic.actor_optimizer.step()
self._logger.store(
**{
{
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
Expand Down
4 changes: 2 additions & 2 deletions omnisafe/algorithms/on_policy/base/trpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def _search_step_size(
set_param_values_to_model(self._actor_critic.actor, theta_old)

self._logger.store(
**{
{
'Train/KL': final_kl,
},
)
Expand Down Expand Up @@ -199,7 +199,7 @@ def _update_actor( # pylint: disable=too-many-arguments,too-many-locals
loss, info = self._loss_pi(obs, act, logp, adv)

self._logger.store(
**{
{
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
Expand Down
4 changes: 2 additions & 2 deletions omnisafe/algorithms/on_policy/first_order/cup.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _loss_pi_cost(self, obs, act, logp, adv_c):
entropy = distribution.entropy().mean().item()
info = {'entropy': entropy, 'ratio': ratio.mean().item(), 'std': std}

self._logger.store(**{'Loss/Loss_pi_c': loss.item()})
self._logger.store({'Loss/Loss_pi_c': loss.item()})

return loss, info

Expand Down Expand Up @@ -195,7 +195,7 @@ def _update(self) -> None:
break

self._logger.store(
**{
{
'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier.item(),
'Train/SecondStepStopIter': i + 1, # pylint: disable=undefined-loop-variable
'Train/SecondStepEntropy': info['entropy'],
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/first_order/focops.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def _update(self) -> None:
break

self._logger.store(
**{
{
'Train/StopIter': i + 1, # pylint: disable=undefined-loop-variable
'Value/Adv': adv_r.mean().item(),
'Train/KL': kl,
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/naive_lagrange/pdo.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def _update(self) -> None:
# then update the policy and value function
super()._update()

self._logger.store(**{'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})
self._logger.store({'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})

def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> torch.Tensor:
penalty = self._lagrange.lagrangian_multiplier.item()
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/naive_lagrange/ppo_lag.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _update(self) -> None:
# then update the policy and value function
super()._update()

self._logger.store(**{'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})
self._logger.store({'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})

def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> torch.Tensor:
r"""Compute surrogate loss.
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/naive_lagrange/rcpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _update(self) -> None:
# then update the policy and value function
super()._update()

self._logger.store(**{'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})
self._logger.store({'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})

def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> torch.Tensor:
penalty = self._lagrange.lagrangian_multiplier.item()
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/naive_lagrange/trpo_lag.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _update(self) -> None:
# then update the policy and value function
super()._update()

self._logger.store(**{'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})
self._logger.store({'Metrics/LagrangeMultiplier': self._lagrange.lagrangian_multiplier})

def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> torch.Tensor:
r"""Compute surrogate loss.
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/penalty_function/ipo.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,6 @@ def _compute_adv_surrogate(self, adv_r: torch.Tensor, adv_c: torch.Tensor) -> to
if penalty < 0 or penalty > self._cfgs.algo_cfgs.penalty_max:
penalty = self._cfgs.algo_cfgs.penalty_max

self._logger.store(**{'Misc/Penalty': penalty})
self._logger.store({'Misc/Penalty': penalty})

return (adv_r - penalty * adv_c) / (1 + penalty)
2 changes: 1 addition & 1 deletion omnisafe/algorithms/on_policy/penalty_function/p3o.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def _update_actor(
self._actor_critic.actor_optimizer.step()

self._logger.store(
**{
{
'Train/Entropy': info['entropy'],
'Train/PolicyRatio': info['ratio'],
'Train/PolicyStd': info['std'],
Expand Down
Loading

0 comments on commit 6e2ba34

Please sign in to comment.