From badc42e63a808cd5ec129423c547c60b3a7cca43 Mon Sep 17 00:00:00 2001 From: Gaiejj <524339208@qq.com> Date: Wed, 8 Mar 2023 00:04:02 +0800 Subject: [PATCH 1/7] feat: support policy evaluation --- examples/evaluate_saved_policy.py | 3 +- omnisafe/__init__.py | 1 + omnisafe/adapter/online_adapter.py | 30 +++- omnisafe/envs/wrapper.py | 3 + omnisafe/evaluator.py | 252 +++++++++++++++++++++++++++-- 5 files changed, 272 insertions(+), 17 deletions(-) diff --git a/examples/evaluate_saved_policy.py b/examples/evaluate_saved_policy.py index 14363f7dd..783e45317 100644 --- a/examples/evaluate_saved_policy.py +++ b/examples/evaluate_saved_policy.py @@ -28,4 +28,5 @@ for item in os.scandir(os.path.join(LOG_DIR, 'torch_save')): if item.is_file() and item.name.split('.')[-1] == 'pt': evaluator.load_saved_model(save_dir=LOG_DIR, model_name=item.name) - evaluator.render(num_episode=10, camera_name='track', width=256, height=256) + evaluator.render(num_episodes=10, camera_name='track', width=256, height=256) + evaluator.evaluate(num_episodes=10) diff --git a/omnisafe/__init__.py b/omnisafe/__init__.py index 71973dc66..461d394e4 100644 --- a/omnisafe/__init__.py +++ b/omnisafe/__init__.py @@ -17,6 +17,7 @@ from omnisafe import algorithms from omnisafe.algorithms import ALGORITHMS from omnisafe.algorithms.algo_wrapper import AlgoWrapper as Agent +from omnisafe.evaluator import Evaluator # from omnisafe.algorithms.env_wrapper import EnvWrapper as Env from omnisafe.version import __version__ diff --git a/omnisafe/adapter/online_adapter.py b/omnisafe/adapter/online_adapter.py index f8f483ed3..786ad7579 100644 --- a/omnisafe/adapter/online_adapter.py +++ b/omnisafe/adapter/online_adapter.py @@ -41,11 +41,12 @@ def __init__( # pylint: disable=too-many-arguments num_envs: int, seed: int, cfgs: Config, + **env_kwargs: Dict, ) -> None: assert env_id in support_envs(), f'Env {env_id} is not supported.' self._env_id = env_id - self._env = make(env_id, num_envs=num_envs) + self._env = make(env_id, num_envs=num_envs, **env_kwargs) self._wrapper( obs_normalize=cfgs.algo_cfgs.obs_normalize, reward_normalize=cfgs.algo_cfgs.reward_normalize, @@ -75,6 +76,33 @@ def _wrapper( if self._env.num_envs == 1: self._env = Unsqueeze(self._env) + def load(self, obs_normlizer_dict): # pylint: disable=unused-argument + """Load the environment. + + Args: + obs_normlizer_dict (Dict): the dict of the observation normalizer. + """ + assert self._cfgs.algo_cfgs.obs_normalize, 'The observation normalizer is not loaded.' + self._env.load(obs_normlizer_dict) + + def render(self) -> None: + """Render the environment.""" + return self._env.render() + + @property + def fps(self) -> int: + """The fps of the environment. + + Returns: + int: the fps. + """ + try: + fps = self._env.metadata['render_fps'] + except: + fps = 30 + Warning('No fps information, set to 30') + return fps + @property def action_space(self) -> OmnisafeSpace: """The action space of the environment. diff --git a/omnisafe/envs/wrapper.py b/omnisafe/envs/wrapper.py index 70f658c9a..33e047688 100644 --- a/omnisafe/envs/wrapper.py +++ b/omnisafe/envs/wrapper.py @@ -137,6 +137,9 @@ def save(self) -> Dict[str, torch.nn.Module]: saved['obs_normalizer'] = self._obs_normalizer return saved + def load(self, obs_normalizer_dict: dict) -> None: + self._obs_normalizer.load_state_dict(obs_normalizer_dict) + class RewardNormalize(Wrapper): """Normalize the reward. diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py index 0b73669a7..0ee5657f1 100644 --- a/omnisafe/evaluator.py +++ b/omnisafe/evaluator.py @@ -14,36 +14,258 @@ # ============================================================================== """Implementation of Evaluator.""" +import json +import os + +import numpy as np +import torch +from gymnasium.spaces import Discrete +from gymnasium.utils.save_video import save_video + +from omnisafe.adapter.online_adapter import OnlineAdapter as EnvWrapper +from omnisafe.models.actor import ActorBuilder +from omnisafe.utils.config import Config + class Evaluator: # pylint: disable=too-many-instance-attributes """This class includes common evaluation methods for safe RL algorithms.""" - def __init__(self) -> None: - pass + # pylint: disable-next=too-many-arguments + def __init__( + self, + env=None, + actor=None, + obs_normalize=None, + play=True, + save_replay=True, + ): + """Initialize the evaluator. + + Args: + env (gymnasium.Env): the environment. if None, the environment will be created from the config. + pi (omnisafe.algos.models.actor.Actor): the policy. if None, the policy will be created from the config. + obs_normalize (omnisafe.algos.models.obs_normalize): the observation Normalize. + """ + # set the attributes + self.env = env + self.actor = actor + self.obs_normalizer = obs_normalize if obs_normalize is not None else lambda x: x + self.env_wrapper_class = type(env) if env is not None else None + + # used when load model from saved file. + self.cfgs = None + self.save_dir = None + self.model_name = None + self.algo_name = None + self.model_params = None - def load_saved_model(self, save_dir: str, model_name: str) -> None: - """Load saved model from save_dir. + # set the render mode + self.play = play + self.save_replay = save_replay + self.set_render_mode(play, save_replay) + + def set_render_mode(self, play: bool = True, save_replay: bool = True): + """Set the render mode. Args: - save_dir (str): The directory of saved model. - model_name (str): The name of saved model. + render_mode (str): render mode. + """ + # set the render mode + if play and save_replay: + self.render_mode = 'rgb_array' + elif play and not save_replay: + self.render_mode = 'human' + elif not play and save_replay: + self.render_mode = 'rgb_array_list' + else: + self.render_mode = None + + # pylint: disable-next=too-many-locals + def load_saved_model(self, save_dir: str, model_name: str): + """Load a saved model. + Args: + save_dir (str): directory where the model is saved. + model_name (str): name of the model. """ + # load the config + self.save_dir = save_dir + self.model_name = model_name + cfg_path = os.path.join(save_dir, 'config.json') + try: + with open(cfg_path, encoding='utf-8') as file: + kwargs = json.load(file) + except FileNotFoundError as error: + raise FileNotFoundError( + 'The config file is not found in the save directory.' + ) from error + self.cfgs = Config.dict2config(kwargs) + + # load the saved model + model_path = os.path.join(save_dir, 'torch_save', model_name) + try: + self.model_params = torch.load(model_path) + except FileNotFoundError as error: + raise FileNotFoundError('The model is not found in the save directory.') from error + + self.algo_name = self.cfgs['exp_name'].split('-')[0] + # make the environment + env_id = self.cfgs['env_id'] + self.env = self._make_env(env_id, render_mode=self.render_mode) - def load_running_model(self, env, actor) -> None: - """Load running model from env and actor. + # make the actor + observation_space = self.env.observation_space + action_space = self.env.action_space + + act_space_type = 'discrete' if isinstance(action_space, Discrete) else 'continuous' + actor_type = self.cfgs['model_cfgs']['actor_type'] + + pi_cfg = self.cfgs['model_cfgs']['actor'] + weight_initialization_mode = self.cfgs['model_cfgs']['weight_initialization_mode'] + actor_builder = ActorBuilder( + obs_space=observation_space, + act_space=action_space, + hidden_sizes=pi_cfg['hidden_sizes'], + activation=pi_cfg['activation'], + weight_initialization_mode=weight_initialization_mode, + ) + if act_space_type == 'discrete': + self.actor = actor_builder.build_actor('categorical') + else: + self.actor = actor_builder.build_actor(actor_type) + self.actor.load_state_dict(self.model_params['pi']) + + # pylint: disable-next=too-many-locals + def evaluate( + self, + num_episodes: int = 10, + cost_criteria: float = 1.0, + ): + """Evaluate the agent for num_episodes episodes. Args: - env (gym.Env): The environment. - actor (omnisafe.actor.Actor): The actor. + num_episodes (int): number of episodes to evaluate the agent. + cost_criteria (float): the cost criteria for the evaluation. + Returns: + episode_rewards (list): list of episode rewards. + episode_costs (list): list of episode costs. + episode_lengths (list): list of episode lengths. """ + if self.env is None or self.actor is None: + raise ValueError( + 'The environment and the policy must be provided or created before evaluating the agent.' + ) - def evaluate(self, num_episode: int, render: bool = False) -> None: - """Evaluate the model. + episode_rewards = [] + episode_costs = [] + episode_lengths = [] + horizon = 1000 - Args: - num_episode (int): The number of episodes to evaluate. - render (bool): Whether to render the environment. + for episode in range(num_episodes): + obs, _ = self.env.reset() + ep_ret, ep_cost = 0.0, 0.0 + + for step in range(horizon): + with torch.no_grad(): + act = self.actor.predict( + torch.as_tensor(obs, dtype=torch.float32), + deterministic=True, + ) + obs, rew, cost, _, _, _ = self.env.step(act) + ep_ret += rew + ep_cost += (cost_criteria**step) * cost + episode_costs.append(ep_cost.numpy().mean()) + episode_rewards.append(ep_ret.numpy().mean()) + episode_lengths.append(step) + print(f'Episode {episode+1} results:') + print(f'Episode reward: {ep_ret.numpy().mean()}') + print(f'Episode cost: {ep_cost.numpy().mean()}') + print(f'Episode length: {step+1}') + print('Evaluation results:') + print(f'Average episode reward: {np.mean(episode_rewards)}') + print(f'Average episode cost: {np.mean(episode_costs)}') + print(f'Average episode length: {np.mean(episode_lengths)+1}') + return ( + episode_rewards, + episode_costs, + ) + + def render( # pylint: disable=too-many-locals,too-many-arguments,too-many-branches,too-many-statements + self, + num_episodes: int = 0, + play=True, + save_replay_path: str = None, + camera_name: str = None, + camera_id: str = None, + width: int = None, + height: int = None, + ): + """Render the environment for one episode. + Args: + seed (int): seed for the environment. If None, the environment will be reset with a random seed. + save_replay_path (str): path to save the replay. If None, no replay is saved. """ + + if save_replay_path is None: + save_replay_path = os.path.join(self.save_dir, 'video', self.model_name.split('.')[0]) + + # remake the environment if the render mode can not support needed play or save_replay + if self.env is None or self.actor is None: + raise ValueError( + 'The environment and the policy must be provided or created before evaluating the agent.' + ) + self.set_render_mode(play, save_replay_path is not None) + print(f'Render mode: {self.render_mode}') + width = self.env.width if width is None else width + height = self.env.height if height is None else height + env_kwargs = { + 'env_id': self.cfgs['env_id'], + 'render_mode': self.render_mode, + 'camera_id': camera_id, + 'camera_name': camera_name, + 'width': width, + 'height': height, + } + self.env = self._make_env(**env_kwargs) + if self.cfgs['algo_cfgs']['obs_normalize']: + self.env.load(self.model_params['obs_normalizer']) + horizon = 1000 + frames = [] + obs, _ = self.env.reset() + if self.render_mode == 'human': + self.env.render() + elif self.render_mode == 'rgb_array': + frames.append(self.env.render()) + for episode_idx in range(num_episodes): + for _ in range(horizon): + with torch.no_grad(): + act = self.actor.predict(obs, deterministic=True) + obs, _, _, done, truncated, _ = self.env.step(act.cpu().squeeze()) + if done[0] or truncated[0]: + break + if self.render_mode == 'rgb_array': + frames.append(self.env.render()) + + if self.render_mode == 'rgb_array_list': + frames = self.env.render() + if save_replay_path is not None: + save_video( + frames, + save_replay_path, + fps=self.env.fps, + episode_trigger=lambda x: True, + video_length=horizon, + episode_index=episode_idx, + name_prefix='eval', + ) + self.env.reset() + frames = [] + + def _make_env(self, env_id, **env_kwargs): + """Make wrapped environment.""" + + return EnvWrapper( + env_id, self.cfgs.train_cfgs.vector_env_nums, self.cfgs.seed, self.cfgs, **env_kwargs + ) From abdd6114c709546f97cd83c56f8984dc53a26298 Mon Sep 17 00:00:00 2001 From: Gaiejj <524339208@qq.com> Date: Wed, 8 Mar 2023 00:22:33 +0800 Subject: [PATCH 2/7] wip --- omnisafe/adapter/online_adapter.py | 6 ++++-- omnisafe/envs/wrapper.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/omnisafe/adapter/online_adapter.py b/omnisafe/adapter/online_adapter.py index 786ad7579..7dffcfd90 100644 --- a/omnisafe/adapter/online_adapter.py +++ b/omnisafe/adapter/online_adapter.py @@ -14,6 +14,7 @@ # ============================================================================== """Online Adapter for OmniSafe.""" +import warnings from typing import Dict, Tuple import torch @@ -98,9 +99,10 @@ def fps(self) -> int: """ try: fps = self._env.metadata['render_fps'] - except: + except KeyError: fps = 30 - Warning('No fps information, set to 30') + warnings.warn('The fps is not found, use 30 as default.') + return fps @property diff --git a/omnisafe/envs/wrapper.py b/omnisafe/envs/wrapper.py index 33e047688..4d22f6734 100644 --- a/omnisafe/envs/wrapper.py +++ b/omnisafe/envs/wrapper.py @@ -138,6 +138,7 @@ def save(self) -> Dict[str, torch.nn.Module]: return saved def load(self, obs_normalizer_dict: dict) -> None: + """Load the normalizer.""" self._obs_normalizer.load_state_dict(obs_normalizer_dict) From 2042d07d485f8e1f484259b143d8ec66a53c4b9a Mon Sep 17 00:00:00 2001 From: Gaiejj <524339208@qq.com> Date: Wed, 8 Mar 2023 01:39:34 +0800 Subject: [PATCH 3/7] refactor: change evaluator building --- examples/evaluate_saved_policy.py | 8 ++- omnisafe/adapter/online_adapter.py | 16 ------ omnisafe/evaluator.py | 92 +++++++++++++++++------------- 3 files changed, 58 insertions(+), 58 deletions(-) diff --git a/examples/evaluate_saved_policy.py b/examples/evaluate_saved_policy.py index 783e45317..8ef66251c 100644 --- a/examples/evaluate_saved_policy.py +++ b/examples/evaluate_saved_policy.py @@ -27,6 +27,8 @@ evaluator = omnisafe.Evaluator() for item in os.scandir(os.path.join(LOG_DIR, 'torch_save')): if item.is_file() and item.name.split('.')[-1] == 'pt': - evaluator.load_saved_model(save_dir=LOG_DIR, model_name=item.name) - evaluator.render(num_episodes=10, camera_name='track', width=256, height=256) - evaluator.evaluate(num_episodes=10) + evaluator.load_saved_model( + save_dir=LOG_DIR, model_name=item.name, camera_name='track', width=256, height=256 + ) + evaluator.render(num_episodes=1) + evaluator.evaluate(num_episodes=1) diff --git a/omnisafe/adapter/online_adapter.py b/omnisafe/adapter/online_adapter.py index 7dffcfd90..efea707e9 100644 --- a/omnisafe/adapter/online_adapter.py +++ b/omnisafe/adapter/online_adapter.py @@ -14,7 +14,6 @@ # ============================================================================== """Online Adapter for OmniSafe.""" -import warnings from typing import Dict, Tuple import torch @@ -90,21 +89,6 @@ def render(self) -> None: """Render the environment.""" return self._env.render() - @property - def fps(self) -> int: - """The fps of the environment. - - Returns: - int: the fps. - """ - try: - fps = self._env.metadata['render_fps'] - except KeyError: - fps = 30 - warnings.warn('The fps is not found, use 30 as default.') - - return fps - @property def action_space(self) -> OmnisafeSpace: """The action space of the environment. diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py index 0ee5657f1..f7c7edd22 100644 --- a/omnisafe/evaluator.py +++ b/omnisafe/evaluator.py @@ -16,13 +16,17 @@ import json import os +import warnings +from typing import Optional import numpy as np import torch from gymnasium.spaces import Discrete from gymnasium.utils.save_video import save_video -from omnisafe.adapter.online_adapter import OnlineAdapter as EnvWrapper +from omnisafe.common import Normalizer +from omnisafe.envs.core import make +from omnisafe.envs.wrapper import ActionScale, ObsNormalize, TimeLimit, Unsqueeze from omnisafe.models.actor import ActorBuilder from omnisafe.utils.config import Config @@ -81,7 +85,15 @@ def set_render_mode(self, play: bool = True, save_replay: bool = True): self.render_mode = None # pylint: disable-next=too-many-locals - def load_saved_model(self, save_dir: str, model_name: str): + def load_saved_model( + self, + save_dir: str, + model_name: str, + camera_name: Optional[str] = None, + camera_id: Optional[int] = None, + width: Optional[int] = None, + height: Optional[int] = None, + ): """Load a saved model. Args: @@ -109,9 +121,28 @@ def load_saved_model(self, save_dir: str, model_name: str): raise FileNotFoundError('The model is not found in the save directory.') from error self.algo_name = self.cfgs['exp_name'].split('-')[0] - # make the environment - env_id = self.cfgs['env_id'] - self.env = self._make_env(env_id, render_mode=self.render_mode) + + width = self.env.width if width is None else width + height = self.env.height if height is None else height + env_kwargs = { + 'env_id': self.cfgs['env_id'], + 'num_envs': self.cfgs['train_cfgs']['vector_env_nums'], + 'render_mode': self.render_mode, + 'camera_id': camera_id, + 'camera_name': camera_name, + 'width': width, + 'height': height, + } + self.env = make(**env_kwargs) + if self.cfgs['algo_cfgs']['obs_normalize']: + obs_normalizer = Normalizer(shape=self.env.observation_space.shape, clip=5) + obs_normalizer.load_state_dict(self.model_params['obs_normalizer']) + self.env = ObsNormalize(self.env, obs_normalizer) + if self.env.need_time_limit_wrapper: + self.env = TimeLimit(self.env, time_limit=1000) + self.env = ActionScale(self.env, low=-1.0, high=1.0) + if self.env.num_envs == 1: + self.env = Unsqueeze(self.env) # make the actor observation_space = self.env.observation_space @@ -191,15 +222,25 @@ def evaluate( episode_costs, ) + @property + def fps(self) -> int: + """The fps of the environment. + + Returns: + int: the fps. + """ + try: + fps = self.env.metadata['render_fps'] + except AttributeError: + fps = 30 + warnings.warn('The fps is not found, use 30 as default.') + + return fps + def render( # pylint: disable=too-many-locals,too-many-arguments,too-many-branches,too-many-statements self, num_episodes: int = 0, - play=True, - save_replay_path: str = None, - camera_name: str = None, - camera_id: str = None, - width: int = None, - height: int = None, + save_replay_path: Optional[str] = None, ): """Render the environment for one episode. @@ -211,26 +252,6 @@ def render( # pylint: disable=too-many-locals,too-many-arguments,too-many-branc if save_replay_path is None: save_replay_path = os.path.join(self.save_dir, 'video', self.model_name.split('.')[0]) - # remake the environment if the render mode can not support needed play or save_replay - if self.env is None or self.actor is None: - raise ValueError( - 'The environment and the policy must be provided or created before evaluating the agent.' - ) - self.set_render_mode(play, save_replay_path is not None) - print(f'Render mode: {self.render_mode}') - width = self.env.width if width is None else width - height = self.env.height if height is None else height - env_kwargs = { - 'env_id': self.cfgs['env_id'], - 'render_mode': self.render_mode, - 'camera_id': camera_id, - 'camera_name': camera_name, - 'width': width, - 'height': height, - } - self.env = self._make_env(**env_kwargs) - if self.cfgs['algo_cfgs']['obs_normalize']: - self.env.load(self.model_params['obs_normalizer']) horizon = 1000 frames = [] obs, _ = self.env.reset() @@ -254,7 +275,7 @@ def render( # pylint: disable=too-many-locals,too-many-arguments,too-many-branc save_video( frames, save_replay_path, - fps=self.env.fps, + fps=self.fps, episode_trigger=lambda x: True, video_length=horizon, episode_index=episode_idx, @@ -262,10 +283,3 @@ def render( # pylint: disable=too-many-locals,too-many-arguments,too-many-branc ) self.env.reset() frames = [] - - def _make_env(self, env_id, **env_kwargs): - """Make wrapped environment.""" - - return EnvWrapper( - env_id, self.cfgs.train_cfgs.vector_env_nums, self.cfgs.seed, self.cfgs, **env_kwargs - ) From f6f420880f6ba0e76d03d9b4b4c2ed59495866d5 Mon Sep 17 00:00:00 2001 From: ruiyang sun Date: Wed, 8 Mar 2023 16:13:12 +0800 Subject: [PATCH 4/7] refactor(evaluate) --- omnisafe/adapter/online_adapter.py | 16 +- omnisafe/envs/core.py | 10 + omnisafe/envs/safety_gymnasium_env.py | 1 + omnisafe/envs/wrapper.py | 4 - omnisafe/evaluator.py | 252 ++++++++++++++------------ 5 files changed, 147 insertions(+), 136 deletions(-) diff --git a/omnisafe/adapter/online_adapter.py b/omnisafe/adapter/online_adapter.py index efea707e9..f8f483ed3 100644 --- a/omnisafe/adapter/online_adapter.py +++ b/omnisafe/adapter/online_adapter.py @@ -41,12 +41,11 @@ def __init__( # pylint: disable=too-many-arguments num_envs: int, seed: int, cfgs: Config, - **env_kwargs: Dict, ) -> None: assert env_id in support_envs(), f'Env {env_id} is not supported.' self._env_id = env_id - self._env = make(env_id, num_envs=num_envs, **env_kwargs) + self._env = make(env_id, num_envs=num_envs) self._wrapper( obs_normalize=cfgs.algo_cfgs.obs_normalize, reward_normalize=cfgs.algo_cfgs.reward_normalize, @@ -76,19 +75,6 @@ def _wrapper( if self._env.num_envs == 1: self._env = Unsqueeze(self._env) - def load(self, obs_normlizer_dict): # pylint: disable=unused-argument - """Load the environment. - - Args: - obs_normlizer_dict (Dict): the dict of the observation normalizer. - """ - assert self._cfgs.algo_cfgs.obs_normalize, 'The observation normalizer is not loaded.' - self._env.load(obs_normlizer_dict) - - def render(self) -> None: - """Render the environment.""" - return self._env.render() - @property def action_space(self) -> OmnisafeSpace: """The action space of the environment. diff --git a/omnisafe/envs/core.py b/omnisafe/envs/core.py index 52489f8ac..750d44831 100644 --- a/omnisafe/envs/core.py +++ b/omnisafe/envs/core.py @@ -42,6 +42,7 @@ class CMDP(ABC): _support_envs: List[str] _action_space: OmnisafeSpace _observation_space: OmnisafeSpace + _metadata: Dict[str, Any] _num_envs: int _time_limit: Optional[int] = None @@ -86,6 +87,15 @@ def observation_space(self) -> OmnisafeSpace: """ return self._observation_space + @property + def metadata(self) -> Dict[str, Any]: + """The metadata of the environment. + + Returns: + Dict[str, Any]: the metadata. + """ + return self._metadata + @property def num_envs(self) -> int: """The parallel environments. diff --git a/omnisafe/envs/safety_gymnasium_env.py b/omnisafe/envs/safety_gymnasium_env.py index f35e13afd..09ca9971b 100644 --- a/omnisafe/envs/safety_gymnasium_env.py +++ b/omnisafe/envs/safety_gymnasium_env.py @@ -85,6 +85,7 @@ def __init__(self, env_id: str, num_envs: int = 1, **kwargs) -> None: self._observation_space = self._env.observation_space self._num_envs = num_envs + self._metadata = self._env.metadata def step( self, action: torch.Tensor diff --git a/omnisafe/envs/wrapper.py b/omnisafe/envs/wrapper.py index 4d22f6734..70f658c9a 100644 --- a/omnisafe/envs/wrapper.py +++ b/omnisafe/envs/wrapper.py @@ -137,10 +137,6 @@ def save(self) -> Dict[str, torch.nn.Module]: saved['obs_normalizer'] = self._obs_normalizer return saved - def load(self, obs_normalizer_dict: dict) -> None: - """Load the normalizer.""" - self._obs_normalizer.load_state_dict(obs_normalizer_dict) - class RewardNormalize(Wrapper): """Normalize the reward. diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py index f7c7edd22..cf0e0c1fa 100644 --- a/omnisafe/evaluator.py +++ b/omnisafe/evaluator.py @@ -17,17 +17,18 @@ import json import os import warnings -from typing import Optional +from typing import Any, Dict, List, Optional import numpy as np import torch -from gymnasium.spaces import Discrete +from gymnasium.spaces import Box from gymnasium.utils.save_video import save_video from omnisafe.common import Normalizer -from omnisafe.envs.core import make -from omnisafe.envs.wrapper import ActionScale, ObsNormalize, TimeLimit, Unsqueeze +from omnisafe.envs.core import CMDP, make +from omnisafe.envs.wrapper import ActionScale, ObsNormalize, TimeLimit from omnisafe.models.actor import ActorBuilder +from omnisafe.models.base import Actor from omnisafe.utils.config import Config @@ -37,11 +38,8 @@ class Evaluator: # pylint: disable=too-many-instance-attributes # pylint: disable-next=too-many-arguments def __init__( self, - env=None, - actor=None, - obs_normalize=None, - play=True, - save_replay=True, + play: bool = True, + save_replay: bool = True, ): """Initialize the evaluator. @@ -51,24 +49,20 @@ def __init__( obs_normalize (omnisafe.algos.models.obs_normalize): the observation Normalize. """ # set the attributes - self.env = env - self.actor = actor - self.obs_normalizer = obs_normalize if obs_normalize is not None else lambda x: x - self.env_wrapper_class = type(env) if env is not None else None + self._env: CMDP + self._actor: Actor # used when load model from saved file. - self.cfgs = None - self.save_dir = None - self.model_name = None - self.algo_name = None - self.model_params = None + self._cfgs: Config + self._save_dir: str + self._model_name: str # set the render mode - self.play = play - self.save_replay = save_replay - self.set_render_mode(play, save_replay) + self._play = play + self._save_replay = save_replay + self.__set_render_mode(play, save_replay) - def set_render_mode(self, play: bool = True, save_replay: bool = True): + def __set_render_mode(self, play: bool = True, save_replay: bool = True): """Set the render mode. Args: @@ -76,33 +70,20 @@ def set_render_mode(self, play: bool = True, save_replay: bool = True): """ # set the render mode if play and save_replay: - self.render_mode = 'rgb_array' + self._render_mode = 'rgb_array' elif play and not save_replay: - self.render_mode = 'human' + self._render_mode = 'human' elif not play and save_replay: - self.render_mode = 'rgb_array_list' + self._render_mode = 'rgb_array_list' else: - self.render_mode = None + raise NotImplementedError('The render mode is not implemented.') - # pylint: disable-next=too-many-locals - def load_saved_model( - self, - save_dir: str, - model_name: str, - camera_name: Optional[str] = None, - camera_id: Optional[int] = None, - width: Optional[int] = None, - height: Optional[int] = None, - ): - """Load a saved model. + def __load_cfgs(self, save_dir: str): + """Load the config from the save directory. Args: save_dir (str): directory where the model is saved. - model_name (str): name of the model. """ - # load the config - self.save_dir = save_dir - self.model_name = model_name cfg_path = os.path.join(save_dir, 'config.json') try: with open(cfg_path, encoding='utf-8') as file: @@ -111,48 +92,42 @@ def load_saved_model( raise FileNotFoundError( 'The config file is not found in the save directory.' ) from error - self.cfgs = Config.dict2config(kwargs) + self._cfgs = Config.dict2config(kwargs) + def __load_model_and_env(self, save_dir: str, model_name: str, env_kwargs: Dict[str, Any]): + """Load the model from the save directory. + + Args: + save_dir (str): directory where the model is saved. + model_name (str): name of the model. + """ # load the saved model model_path = os.path.join(save_dir, 'torch_save', model_name) try: - self.model_params = torch.load(model_path) + model_params = torch.load(model_path) except FileNotFoundError as error: raise FileNotFoundError('The model is not found in the save directory.') from error - self.algo_name = self.cfgs['exp_name'].split('-')[0] + # load the environment + self._env = make(**env_kwargs) - width = self.env.width if width is None else width - height = self.env.height if height is None else height - env_kwargs = { - 'env_id': self.cfgs['env_id'], - 'num_envs': self.cfgs['train_cfgs']['vector_env_nums'], - 'render_mode': self.render_mode, - 'camera_id': camera_id, - 'camera_name': camera_name, - 'width': width, - 'height': height, - } - self.env = make(**env_kwargs) - if self.cfgs['algo_cfgs']['obs_normalize']: - obs_normalizer = Normalizer(shape=self.env.observation_space.shape, clip=5) - obs_normalizer.load_state_dict(self.model_params['obs_normalizer']) - self.env = ObsNormalize(self.env, obs_normalizer) - if self.env.need_time_limit_wrapper: - self.env = TimeLimit(self.env, time_limit=1000) - self.env = ActionScale(self.env, low=-1.0, high=1.0) - if self.env.num_envs == 1: - self.env = Unsqueeze(self.env) - - # make the actor - observation_space = self.env.observation_space - action_space = self.env.action_space - - act_space_type = 'discrete' if isinstance(action_space, Discrete) else 'continuous' - actor_type = self.cfgs['model_cfgs']['actor_type'] - - pi_cfg = self.cfgs['model_cfgs']['actor'] - weight_initialization_mode = self.cfgs['model_cfgs']['weight_initialization_mode'] + observation_space = self._env.observation_space + action_space = self._env.action_space + + assert isinstance(observation_space, Box), 'The observation space must be Box.' + assert isinstance(action_space, Box), 'The action space must be Box.' + + if self._cfgs['algo_cfgs']['obs_normalize']: + obs_normalizer = Normalizer(shape=observation_space.shape, clip=5) + obs_normalizer.load_state_dict(model_params['obs_normalizer']) + self._env = ObsNormalize(self._env, obs_normalizer) + if self._env.need_time_limit_wrapper: + self._env = TimeLimit(self._env, time_limit=1000) + self._env = ActionScale(self._env, low=-1.0, high=1.0) + + actor_type = self._cfgs['model_cfgs']['actor_type'] + pi_cfg = self._cfgs['model_cfgs']['actor'] + weight_initialization_mode = self._cfgs['model_cfgs']['weight_initialization_mode'] actor_builder = ActorBuilder( obs_space=observation_space, act_space=action_space, @@ -160,13 +135,43 @@ def load_saved_model( activation=pi_cfg['activation'], weight_initialization_mode=weight_initialization_mode, ) - if act_space_type == 'discrete': - self.actor = actor_builder.build_actor('categorical') - else: - self.actor = actor_builder.build_actor(actor_type) - self.actor.load_state_dict(self.model_params['pi']) + self._actor = actor_builder.build_actor(actor_type) + self._actor.load_state_dict(model_params['pi']) # pylint: disable-next=too-many-locals + def load_saved( + self, + save_dir: str, + model_name: str, + camera_name: Optional[str] = None, + camera_id: Optional[int] = None, + width: Optional[int] = None, + height: Optional[int] = None, + ): + """Load a saved model. + + Args: + save_dir (str): directory where the model is saved. + model_name (str): name of the model. + """ + # load the config + self._save_dir = save_dir + self._model_name = model_name + + self.__load_cfgs(save_dir) + + env_kwargs = { + 'env_id': self._cfgs['env_id'], + 'num_envs': 1, + 'render_mode': self._render_mode, + 'camera_id': camera_id, + 'camera_name': camera_name, + 'width': width, + 'height': height, + } + + self.__load_model_and_env(save_dir, model_name, env_kwargs) + def evaluate( self, num_episodes: int = 10, @@ -183,36 +188,44 @@ def evaluate( episode_costs (list): list of episode costs. episode_lengths (list): list of episode lengths. """ - if self.env is None or self.actor is None: + if self._env is None or self._actor is None: raise ValueError( 'The environment and the policy must be provided or created before evaluating the agent.' ) - episode_rewards = [] - episode_costs = [] - episode_lengths = [] - horizon = 1000 + episode_rewards: List[float] = [] + episode_costs: List[float] = [] + episode_lengths: List[float] = [] for episode in range(num_episodes): - obs, _ = self.env.reset() - ep_ret, ep_cost = 0.0, 0.0 + obs, _ = self._env.reset() + ep_ret, ep_cost, length = 0.0, 0.0, 0.0 - for step in range(horizon): + done = False + while not done: with torch.no_grad(): - act = self.actor.predict( + act = self._actor.predict( torch.as_tensor(obs, dtype=torch.float32), - deterministic=True, + deterministic=False, ) - obs, rew, cost, _, _, _ = self.env.step(act) - ep_ret += rew - ep_cost += (cost_criteria**step) * cost - episode_costs.append(ep_cost.numpy().mean()) - episode_rewards.append(ep_ret.numpy().mean()) - episode_lengths.append(step) + obs, rew, cost, terminated, truncated, _ = self._env.step(act) + + ep_ret += rew.item() + ep_cost += (cost_criteria**length) * cost.item() + length += 1 + + done = bool(terminated or truncated) + + episode_rewards.append(ep_ret) + episode_costs.append(ep_cost) + episode_lengths.append(length) + print(f'Episode {episode+1} results:') - print(f'Episode reward: {ep_ret.numpy().mean()}') - print(f'Episode cost: {ep_cost.numpy().mean()}') - print(f'Episode length: {step+1}') + print(f'Episode reward: {ep_ret}') + print(f'Episode cost: {ep_cost}') + print(f'Episode length: {length}') + + print('#' * 50) print('Evaluation results:') print(f'Average episode reward: {np.mean(episode_rewards)}') print(f'Average episode cost: {np.mean(episode_costs)}') @@ -230,7 +243,7 @@ def fps(self) -> int: int: the fps. """ try: - fps = self.env.metadata['render_fps'] + fps = self._env.metadata['render_fps'] except AttributeError: fps = 30 warnings.warn('The fps is not found, use 30 as default.') @@ -250,27 +263,32 @@ def render( # pylint: disable=too-many-locals,too-many-arguments,too-many-branc """ if save_replay_path is None: - save_replay_path = os.path.join(self.save_dir, 'video', self.model_name.split('.')[0]) + save_replay_path = os.path.join(self._save_dir, 'video', self._model_name.split('.')[0]) horizon = 1000 frames = [] - obs, _ = self.env.reset() - if self.render_mode == 'human': - self.env.render() - elif self.render_mode == 'rgb_array': - frames.append(self.env.render()) + obs, _ = self._env.reset() + if self._render_mode == 'human': + self._env.render() + elif self._render_mode == 'rgb_array': + frames.append(self._env.render()) + for episode_idx in range(num_episodes): - for _ in range(horizon): + step = 0 + done = False + while not done and step <= 2000: # a big number to make sure the episode will end with torch.no_grad(): - act = self.actor.predict(obs, deterministic=True) - obs, _, _, done, truncated, _ = self.env.step(act.cpu().squeeze()) - if done[0] or truncated[0]: - break - if self.render_mode == 'rgb_array': - frames.append(self.env.render()) - - if self.render_mode == 'rgb_array_list': - frames = self.env.render() + act = self._actor.predict(obs, deterministic=False) + obs, _, _, terminated, truncated, _ = self._env.step(act) + step += 1 + done = bool(terminated or truncated) + + if self._render_mode == 'rgb_array': + frames.append(self._env.render()) + + if self._render_mode == 'rgb_array_list': + frames = self._env.render() + if save_replay_path is not None: save_video( frames, @@ -281,5 +299,5 @@ def render( # pylint: disable=too-many-locals,too-many-arguments,too-many-branc episode_index=episode_idx, name_prefix='eval', ) - self.env.reset() + self._env.reset() frames = [] From 25f9dd23b3a85e0a4fc2a10f6fc60e818477dc43 Mon Sep 17 00:00:00 2001 From: ruiyang sun Date: Wed, 8 Mar 2023 17:20:17 +0800 Subject: [PATCH 5/7] fix(normalize): fix normalize can't load correctly --- omnisafe/common/normalizer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/omnisafe/common/normalizer.py b/omnisafe/common/normalizer.py index fde7567cb..918a0de99 100644 --- a/omnisafe/common/normalizer.py +++ b/omnisafe/common/normalizer.py @@ -14,7 +14,7 @@ # ============================================================================== """Implementation of Vector Buffer.""" -from typing import Tuple +from typing import Any, Mapping, Tuple import torch import torch.nn as nn @@ -108,3 +108,7 @@ def _push(self, raw_data: torch.Tensor) -> None: self._var = self._sumsq / (self._count - 1) self._std = torch.sqrt(self._var) self._std = torch.max(self._std, 1e-2 * torch.ones_like(self._std)) + + def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True): + self._first = False + return super().load_state_dict(state_dict, strict) From 75ba123510c6eeaff01e539843f7729a89b487f3 Mon Sep 17 00:00:00 2001 From: Gaiejj <524339208@qq.com> Date: Thu, 9 Mar 2023 01:37:10 +0800 Subject: [PATCH 6/7] wip --- examples/evaluate_saved_policy.py | 9 +++++---- omnisafe/evaluator.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/examples/evaluate_saved_policy.py b/examples/evaluate_saved_policy.py index 8ef66251c..6fcd8ea45 100644 --- a/examples/evaluate_saved_policy.py +++ b/examples/evaluate_saved_policy.py @@ -20,14 +20,15 @@ # Just fill your experiment's log directory in here. -# Such as: ~/omnisafe/runs/SafetyPointGoal1-v0/CPO/seed-000-2022-12-25_14-45-05 +# Such as: ~/omnisafe/examples/runs/PPOLag-/seed-000-2023-03-07-20-25-48 LOG_DIR = '' - +play = True +save_replay = True if __name__ == '__main__': - evaluator = omnisafe.Evaluator() + evaluator = omnisafe.Evaluator(play=play, save_replay=save_replay) for item in os.scandir(os.path.join(LOG_DIR, 'torch_save')): if item.is_file() and item.name.split('.')[-1] == 'pt': - evaluator.load_saved_model( + evaluator.load_saved( save_dir=LOG_DIR, model_name=item.name, camera_name='track', width=256, height=256 ) evaluator.render(num_episodes=1) diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py index cf0e0c1fa..2877033b8 100644 --- a/omnisafe/evaluator.py +++ b/omnisafe/evaluator.py @@ -60,6 +60,9 @@ def __init__( # set the render mode self._play = play self._save_replay = save_replay + + self._dividing_line = '\n' + '#' * 50 + '\n' + self.__set_render_mode(play, save_replay) def __set_render_mode(self, play: bool = True, save_replay: bool = True): @@ -225,7 +228,7 @@ def evaluate( print(f'Episode cost: {ep_cost}') print(f'Episode length: {length}') - print('#' * 50) + print(self._dividing_line) print('Evaluation results:') print(f'Average episode reward: {np.mean(episode_rewards)}') print(f'Average episode cost: {np.mean(episode_costs)}') @@ -254,6 +257,7 @@ def render( # pylint: disable=too-many-locals,too-many-arguments,too-many-branc self, num_episodes: int = 0, save_replay_path: Optional[str] = None, + max_render_steps: int = 2000, ): """Render the environment for one episode. @@ -276,7 +280,9 @@ def render( # pylint: disable=too-many-locals,too-many-arguments,too-many-branc for episode_idx in range(num_episodes): step = 0 done = False - while not done and step <= 2000: # a big number to make sure the episode will end + while ( + not done and step <= max_render_steps + ): # a big number to make sure the episode will end with torch.no_grad(): act = self._actor.predict(obs, deterministic=False) obs, _, _, terminated, truncated, _ = self._env.step(act) From 53dd79a01794b7d2a51634c7a7db5eee944d8f7d Mon Sep 17 00:00:00 2001 From: Gaiejj <524339208@qq.com> Date: Thu, 9 Mar 2023 11:05:36 +0800 Subject: [PATCH 7/7] refactor: clean the code --- omnisafe/common/normalizer.py | 2 +- omnisafe/evaluator.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/omnisafe/common/normalizer.py b/omnisafe/common/normalizer.py index 918a0de99..52ccff0b6 100644 --- a/omnisafe/common/normalizer.py +++ b/omnisafe/common/normalizer.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Implementation of Vector Buffer.""" +"""Implementation of Normalizer.""" from typing import Any, Mapping, Tuple diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py index 2877033b8..2d060bb92 100644 --- a/omnisafe/evaluator.py +++ b/omnisafe/evaluator.py @@ -69,7 +69,8 @@ def __set_render_mode(self, play: bool = True, save_replay: bool = True): """Set the render mode. Args: - render_mode (str): render mode. + play (bool): whether to play the video. + save_replay (bool): whether to save the video. """ # set the render mode if play and save_replay: @@ -187,9 +188,7 @@ def evaluate( cost_criteria (float): the cost criteria for the evaluation. Returns: - episode_rewards (list): list of episode rewards. - episode_costs (list): list of episode costs. - episode_lengths (list): list of episode lengths. + (float, float, float): the average return, the average cost, and the average length of the episodes. """ if self._env is None or self._actor is None: raise ValueError(