diff --git a/omnisafe/algorithms/on_policy/base/policy_gradient.py b/omnisafe/algorithms/on_policy/base/policy_gradient.py index ae3ec1d22..cd194e35a 100644 --- a/omnisafe/algorithms/on_policy/base/policy_gradient.py +++ b/omnisafe/algorithms/on_policy/base/policy_gradient.py @@ -15,7 +15,7 @@ """Implementation of the Policy Gradient algorithm.""" import time -from typing import Dict, Tuple, Union +from typing import Any, Dict, Tuple, Union import torch import torch.nn as nn @@ -98,12 +98,11 @@ def _init_log(self) -> None: config=self._cfgs, ) - obs_normalizer = self._env.save()['obs_normalizer'] - what_to_save = { - 'pi': self._actor_critic.actor, - 'obs_normalizer': obs_normalizer, - } - + what_to_save: Dict[str, Any] = {} + what_to_save['pi'] = self._actor_critic.actor + if self._cfgs.algo_cfgs.obs_normalize: + obs_normalizer = self._env.save()['obs_normalizer'] + what_to_save['obs_normalizer'] = obs_normalizer self._logger.setup_torch_saver(what_to_save) self._logger.torch_save() diff --git a/omnisafe/common/logger.py b/omnisafe/common/logger.py index 73a6b41a1..0a7b56892 100644 --- a/omnisafe/common/logger.py +++ b/omnisafe/common/logger.py @@ -15,6 +15,7 @@ """Implementation of the Logger.""" import atexit +import csv import os import time from collections import deque @@ -96,7 +97,7 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals self, output_dir: str, exp_name: str, - output_fname: str = 'progress.txt', + output_fname: str = 'progress.csv', verbose: bool = True, seed: int = 0, use_tensorboard: bool = True, @@ -123,6 +124,7 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals ) atexit.register(self._output_file.close) self.log(f'Logging data to {self._output_file.name}', 'cyan', bold=True) + self._csv_writer = csv.writer(self._output_file) self._epoch: int = 0 self._first_row: bool = True @@ -277,9 +279,9 @@ def dump_tabular(self) -> None: self._proc_bar.update(1) if self._first_row: - self._output_file.write(' '.join(self._current_row.keys()) + '\n') + self._csv_writer.writerow(self._current_row.keys()) self._first_row = False - self._output_file.write(' '.join(map(str, self._current_row.values())) + '\n') + self._csv_writer.writerow(self._current_row.values()) self._output_file.flush() if self._use_tensorboard: diff --git a/omnisafe/configs/on-policy/CPO.yaml b/omnisafe/configs/on-policy/CPO.yaml index 36054c030..9caa93101 100644 --- a/omnisafe/configs/on-policy/CPO.yaml +++ b/omnisafe/configs/on-policy/CPO.yaml @@ -37,7 +37,7 @@ defaults: # batch size for each iteration batch_size: 16384 # target kl divergence - target_kl: 0.01 + target_kl: 0.02 # entropy coefficient entropy_coef: 0.0 # normalize reward diff --git a/omnisafe/envs/wrapper.py b/omnisafe/envs/wrapper.py index 0a774a1d8..70f658c9a 100644 --- a/omnisafe/envs/wrapper.py +++ b/omnisafe/envs/wrapper.py @@ -260,7 +260,7 @@ def step( action = self._old_min_action + (self._old_max_action - self._old_min_action) * ( action - self._min_action ) / (self._max_action - self._min_action) - return super().step(action) + return super().step(action.numpy()) class Unsqueeze(Wrapper): @@ -283,6 +283,7 @@ def __init__(self, env: CMDP) -> None: def step( self, action: torch.Tensor ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Dict]: + action = action.squeeze(0) obs, reward, cost, terminated, truncated, info = super().step(action) obs, reward, cost, terminated, truncated = map( lambda x: x.unsqueeze(0), (obs, reward, cost, terminated, truncated) diff --git a/omnisafe/utils/math.py b/omnisafe/utils/math.py index b8e936f5f..0e43c467c 100644 --- a/omnisafe/utils/math.py +++ b/omnisafe/utils/math.py @@ -73,7 +73,7 @@ def gaussian_kl( \mu_q) - k + log(\frac{det(\Sigma_p)}{det(\Sigma_q)})) where :math:`\mu_p` and :math:`\mu_q` are the mean of :math:`p` and :math:`q`, respectively. - :math:`\Sigma_p` and :math:`\Sigma_q` are the covariance of :math:`p` and :math:`q`, respectively. + :math:`\Sigma_p` and :math:`\Sigma_q` are the co-variance of :math:`p` and :math:`q`, respectively. :math:`k` is the dimension of the distribution. For more details, @@ -83,8 +83,8 @@ def gaussian_kl( Args: mean_p (torch.Tensor): mean of the first distribution, shape (B, n) mean_q (torch.Tensor): mean of the second distribution, shape (B, n) - var_p (torch.Tensor): covariance of the first distribution, shape (B, n, n) - var_q (torch.Tensor): covariance of the second distribution, shape (B, n, n) + var_p (torch.Tensor): co-variance of the first distribution, shape (B, n, n) + var_q (torch.Tensor): co-variance of the second distribution, shape (B, n, n) """ len_q = var_q.size(-1) mean_p = mean_p.unsqueeze(-1) # (B, n, 1)