diff --git a/omnisafe/adapter/crabs_adapter.py b/omnisafe/adapter/crabs_adapter.py index bd15a35a8..05c7c24df 100644 --- a/omnisafe/adapter/crabs_adapter.py +++ b/omnisafe/adapter/crabs_adapter.py @@ -22,6 +22,7 @@ from omnisafe.adapter.offpolicy_adapter import OffPolicyAdapter from omnisafe.common.buffer import VectorOffPolicyBuffer +from omnisafe.common.control_barrier_function.crabs.models import MeanPolicy from omnisafe.common.logger import Logger from omnisafe.envs.crabs_env import CRABSEnv from omnisafe.models.actor_critic.constraint_actor_q_critic import ConstraintActorQCritic @@ -55,6 +56,7 @@ def __init__( # pylint: disable=too-many-arguments """Initialize a instance of :class:`CRABSAdapter`.""" super().__init__(env_id, num_envs, seed, cfgs) self._env: CRABSEnv + self._eval_env: CRABSEnv self.n_expl_episodes = 0 self._max_ep_len = self._env.env.spec.max_episode_steps # type: ignore self.horizon = self._max_ep_len @@ -62,7 +64,7 @@ def __init__( # pylint: disable=too-many-arguments def eval_policy( # pylint: disable=too-many-locals self, episode: int, - agent: ConstraintActorQCritic, + agent: ConstraintActorQCritic | MeanPolicy, logger: Logger, ) -> None: """Rollout the environment with deterministic agent action. @@ -74,13 +76,13 @@ def eval_policy( # pylint: disable=too-many-locals """ for _ in range(episode): ep_ret, ep_cost, ep_len = 0.0, 0.0, 0 - obs, _ = self._eval_env.reset() # type: ignore + obs, _ = self._eval_env.reset() obs = obs.to(self._device) done = False while not done: - act = agent.step(obs, deterministic=False) - obs, reward, cost, terminated, truncated, info = self._eval_env.step(act) # type: ignore + act = agent.step(obs, deterministic=True) + obs, reward, cost, terminated, truncated, info = self._eval_env.step(act) obs, reward, cost, terminated, truncated = ( torch.as_tensor(x, dtype=torch.float32, device=self._device) for x in (obs, reward, cost, terminated, truncated) diff --git a/omnisafe/algorithms/off_policy/crabs.py b/omnisafe/algorithms/off_policy/crabs.py index 5601520ca..c8ad67314 100644 --- a/omnisafe/algorithms/off_policy/crabs.py +++ b/omnisafe/algorithms/off_policy/crabs.py @@ -31,12 +31,9 @@ from omnisafe.common.control_barrier_function.crabs.models import ( AddGaussianNoise, CrabsCore, - EnsembleModel, ExplorationPolicy, - GatedTransitionModel, MeanPolicy, MultiLayerPerceptron, - TransitionModel, UniformPolicy, ) from omnisafe.common.control_barrier_function.crabs.optimizers import ( @@ -46,7 +43,11 @@ SLangevinOptimizer, StateBox, ) -from omnisafe.common.control_barrier_function.crabs.utils import Normalizer, get_pretrained_model +from omnisafe.common.control_barrier_function.crabs.utils import ( + Normalizer, + create_model_and_trainer, + get_pretrained_model, +) from omnisafe.models.actor_critic.constraint_actor_q_critic import ConstraintActorQCritic @@ -115,48 +116,13 @@ def _init_model(self) -> None: ).to(self._device) self.mean_policy = MeanPolicy(self._actor_critic.actor) - if self._cfgs.transition_model_cfgs.type == 'GatedTransitionModel': - - def make_model(i): - return GatedTransitionModel( - self.dim_state, - self.normalizer, - [self.dim_state + self.dim_action, 256, 256, 256, 256, self.dim_state * 2], - self._cfgs.transition_model_cfgs.train, - name=f'model-{i}', - ) - - self.model = EnsembleModel( - [make_model(i) for i in range(self._cfgs.transition_model_cfgs.n_ensemble)], - ).to(self._device) - self.model_trainer = pl.Trainer( - max_epochs=0, - accelerator='gpu', - devices=[int(str(self._device)[-1])], - default_root_dir=self._cfgs.logger_cfgs.log_dir, - ) - elif self._cfgs.transition_model_cfgs.type == 'TransitionModel': - - def make_model(i): - return TransitionModel( - self.dim_state, - self.normalizer, - [self.dim_state + self.dim_action, 256, 256, 256, 256, self.dim_state * 2], - self._cfgs.transition_model_cfgs.train, - name=f'model-{i}', - ) - - self.model = EnsembleModel( - [make_model(i) for i in range(self._cfgs.transition_model_cfgs.n_ensemble)], - ).to(self._device) - self.model_trainer = pl.Trainer( - max_epochs=0, - accelerator='gpu', - devices=[int(str(self._device)[-1])], - default_root_dir=self._cfgs.logger_cfgs.log_dir, - ) - else: - raise AssertionError(f'unknown model type {self._cfgs.transition_model_cfgs.type}') + self.model, self.model_trainer = create_model_and_trainer( + self._cfgs, + self.dim_state, + self.dim_action, + self.normalizer, + self._device, + ) def _init_log(self) -> None: super()._init_log() @@ -167,9 +133,18 @@ def _init_log(self) -> None: what_to_save['obs_normalizer'] = self.normalizer self._logger.setup_torch_saver(what_to_save) self._logger.torch_save() - self._logger.register_key('Metrics/RawPolicyEpRet', window_length=50) - self._logger.register_key('Metrics/RawPolicyEpCost', window_length=50) - self._logger.register_key('Metrics/RawPolicyEpLen', window_length=50) + self._logger.register_key( + 'Metrics/RawPolicyEpRet', + window_length=self._cfgs.logger_cfgs.window_lens, + ) + self._logger.register_key( + 'Metrics/RawPolicyEpCost', + window_length=self._cfgs.logger_cfgs.window_lens, + ) + self._logger.register_key( + 'Metrics/RawPolicyEpLen', + window_length=self._cfgs.logger_cfgs.window_lens, + ) def _init(self) -> None: """The initialization of the algorithm. @@ -282,7 +257,7 @@ def learn(self): eval_start = time.time() self._env.eval_policy( episode=self._cfgs.train_cfgs.raw_policy_episodes, - agent=self._actor_critic, + agent=self.mean_policy, logger=self._logger, ) @@ -330,7 +305,7 @@ def learn(self): eval_start = time.time() self._env.eval_policy( episode=self._cfgs.train_cfgs.raw_policy_episodes, - agent=self.mean_policy, # type: ignore + agent=self.mean_policy, logger=self._logger, ) eval_time += time.time() - eval_start diff --git a/omnisafe/algorithms/off_policy/ddpg.py b/omnisafe/algorithms/off_policy/ddpg.py index 850c787b2..517d8c0be 100644 --- a/omnisafe/algorithms/off_policy/ddpg.py +++ b/omnisafe/algorithms/off_policy/ddpg.py @@ -197,14 +197,32 @@ def _init_log(self) -> None: self._logger.setup_torch_saver(what_to_save) self._logger.torch_save() - self._logger.register_key('Metrics/EpRet', window_length=50) - self._logger.register_key('Metrics/EpCost', window_length=50) - self._logger.register_key('Metrics/EpLen', window_length=50) + self._logger.register_key( + 'Metrics/EpRet', + window_length=self._cfgs.logger_cfgs.window_lens, + ) + self._logger.register_key( + 'Metrics/EpCost', + window_length=self._cfgs.logger_cfgs.window_lens, + ) + self._logger.register_key( + 'Metrics/EpLen', + window_length=self._cfgs.logger_cfgs.window_lens, + ) if self._cfgs.train_cfgs.eval_episodes > 0: - self._logger.register_key('Metrics/TestEpRet', window_length=50) - self._logger.register_key('Metrics/TestEpCost', window_length=50) - self._logger.register_key('Metrics/TestEpLen', window_length=50) + self._logger.register_key( + 'Metrics/TestEpRet', + window_length=self._cfgs.logger_cfgs.window_lens, + ) + self._logger.register_key( + 'Metrics/TestEpCost', + window_length=self._cfgs.logger_cfgs.window_lens, + ) + self._logger.register_key( + 'Metrics/TestEpLen', + window_length=self._cfgs.logger_cfgs.window_lens, + ) self._logger.register_key('Train/Epoch') self._logger.register_key('Train/LR') diff --git a/omnisafe/algorithms/on_policy/base/policy_gradient.py b/omnisafe/algorithms/on_policy/base/policy_gradient.py index 4c1539178..e0792d6ab 100644 --- a/omnisafe/algorithms/on_policy/base/policy_gradient.py +++ b/omnisafe/algorithms/on_policy/base/policy_gradient.py @@ -188,9 +188,18 @@ def _init_log(self) -> None: self._logger.setup_torch_saver(what_to_save) self._logger.torch_save() - self._logger.register_key('Metrics/EpRet', window_length=50) - self._logger.register_key('Metrics/EpCost', window_length=50) - self._logger.register_key('Metrics/EpLen', window_length=50) + self._logger.register_key( + 'Metrics/EpRet', + window_length=self._cfgs.logger_cfgs.window_lens, + ) + self._logger.register_key( + 'Metrics/EpCost', + window_length=self._cfgs.logger_cfgs.window_lens, + ) + self._logger.register_key( + 'Metrics/EpLen', + window_length=self._cfgs.logger_cfgs.window_lens, + ) self._logger.register_key('Train/Epoch') self._logger.register_key('Train/Entropy') diff --git a/omnisafe/common/control_barrier_function/crabs/utils.py b/omnisafe/common/control_barrier_function/crabs/utils.py index 70551c0f5..38c949a87 100644 --- a/omnisafe/common/control_barrier_function/crabs/utils.py +++ b/omnisafe/common/control_barrier_function/crabs/utils.py @@ -14,13 +14,22 @@ # ============================================================================== """Utils for CRABS.""" # pylint: disable=all +from __future__ import annotations + import os +import pytorch_lightning as pl import requests import torch import torch.nn as nn from torch import load +from omnisafe.common.control_barrier_function.crabs.models import ( + EnsembleModel, + GatedTransitionModel, + TransitionModel, +) + class Normalizer(nn.Module): """Normalizes input data to have zero mean and unit variance. @@ -119,3 +128,59 @@ def get_pretrained_model(model_path, model_url, device): print('Model found locally.') return load(model_path, map_location=device) + + +def create_model_and_trainer(cfgs, dim_state, dim_action, normalizer, device): + """Create world model and trainer. + + Args: + cfgs: Configs. + dim_state: Dimension of the state. + dim_action: Dimension of the action. + normalizer: Observation normalizer. + device: Device to load the model. + + Returns: + Tuple[nn.Module, pl.Trainer]: World model and trainer. + """ + + def make_model(i, model_type) -> nn.Module: + if model_type == 'GatedTransitionModel': + return GatedTransitionModel( + dim_state, + normalizer, + [dim_state + dim_action, 256, 256, 256, 256, dim_state * 2], + cfgs.transition_model_cfgs.train, + name=f'model-{i}', + ) + if model_type == 'TransitionModel': + return TransitionModel( + dim_state, + normalizer, + [dim_state + dim_action, 256, 256, 256, 256, dim_state * 2], + cfgs.transition_model_cfgs.train, + name=f'model-{i}', + ) + raise AssertionError(f'unknown model type {model_type}') + + model_type = cfgs.transition_model_cfgs.type + models = [make_model(i, model_type) for i in range(cfgs.transition_model_cfgs.n_ensemble)] + + model = EnsembleModel(models).to(device) + + devices: list[int] | int + + if str(device).startswith('cuda'): + accelerator = 'gpu' + devices = [int(str(device)[-1])] + else: + accelerator = 'cpu' + devices = torch.get_num_threads() + trainer = pl.Trainer( + max_epochs=0, + accelerator=accelerator, + devices=devices, + default_root_dir=cfgs.logger_cfgs.log_dir, + ) + + return model, trainer diff --git a/omnisafe/common/offline/dataset.py b/omnisafe/common/offline/dataset.py index 7c8b56c4c..38070f522 100644 --- a/omnisafe/common/offline/dataset.py +++ b/omnisafe/common/offline/dataset.py @@ -126,7 +126,7 @@ def __init__( # pylint: disable=too-many-branches # Load data from local .npz file try: data = np.load(dataset_name) - except Exception as e: + except (ValueError, OSError) as e: raise ValueError(f'Failed to load data from {dataset_name}') from e else: @@ -284,7 +284,7 @@ def __init__( # pylint: disable=too-many-branches, super-init-not-called # Load data from local .npz file try: data = np.load(dataset_name) - except Exception as e: + except (ValueError, OSError) as e: raise ValueError(f'Failed to load data from {dataset_name}') from e else: diff --git a/omnisafe/configs/off-policy/CRABS.yaml b/omnisafe/configs/off-policy/CRABS.yaml index 0af9c9dbb..d0b33e24c 100644 --- a/omnisafe/configs/off-policy/CRABS.yaml +++ b/omnisafe/configs/off-policy/CRABS.yaml @@ -84,7 +84,7 @@ defaults: # save logger path log_dir: "./runs" # save model path - window_lens: 10 + window_lens: 6 # model configurations model_cfgs: # weight initialization mode diff --git a/omnisafe/configs/off-policy/DDPG.yaml b/omnisafe/configs/off-policy/DDPG.yaml index cff58f0bd..6cc8fe8db 100644 --- a/omnisafe/configs/off-policy/DDPG.yaml +++ b/omnisafe/configs/off-policy/DDPG.yaml @@ -81,7 +81,7 @@ defaults: # save logger path log_dir: "./runs" # save model path - window_lens: 10 + window_lens: 50 # model configurations model_cfgs: # weight initialization mode diff --git a/omnisafe/configs/on-policy/PolicyGradient.yaml b/omnisafe/configs/on-policy/PolicyGradient.yaml index 4f72d15ae..5dcb31e3c 100644 --- a/omnisafe/configs/on-policy/PolicyGradient.yaml +++ b/omnisafe/configs/on-policy/PolicyGradient.yaml @@ -87,7 +87,7 @@ defaults: # save logger path log_dir: "./runs" # save model path - window_lens: 100 + window_lens: 50 # model configurations model_cfgs: # weight initialization mode diff --git a/omnisafe/envs/__init__.py b/omnisafe/envs/__init__.py index df8b94c7c..4d225c61d 100644 --- a/omnisafe/envs/__init__.py +++ b/omnisafe/envs/__init__.py @@ -14,8 +14,6 @@ # ============================================================================== """Environment API for OmniSafe.""" -from contextlib import suppress - from omnisafe.envs import classic_control from omnisafe.envs.core import CMDP, env_register, make, support_envs from omnisafe.envs.crabs_env import CRABSEnv diff --git a/omnisafe/envs/classic_control/envs_from_crabs.py b/omnisafe/envs/classic_control/envs_from_crabs.py index 929c03dbb..a3fd3b404 100644 --- a/omnisafe/envs/classic_control/envs_from_crabs.py +++ b/omnisafe/envs/classic_control/envs_from_crabs.py @@ -78,18 +78,20 @@ def __init__( task='upright', random_reset=False, violation_penalty=10, + **kwargs, ) -> None: """Initialize the environment.""" self.threshold = threshold self.task = task self.random_reset = random_reset self.violation_penalty = violation_penalty - super().__init__() + super().__init__(**kwargs) EzPickle.__init__( self, threshold=threshold, task=task, random_reset=random_reset, + **kwargs, ) # deepcopy calls `get_state` def reset_model(self): @@ -156,9 +158,10 @@ def __init__( task='swing', random_reset=False, violation_penalty=10, + **kwargs, ) -> None: """Initialize the environment.""" - super().__init__(threshold=threshold, task=task) + super().__init__(threshold=threshold, task=task, **kwargs) class SafeInvertedPendulumMoveEnv(SafeInvertedPendulumEnv): @@ -170,9 +173,10 @@ def __init__( task='move', random_reset=False, violation_penalty=10, + **kwargs, ) -> None: """Initialize the environment.""" - super().__init__(threshold=threshold, task=task) + super().__init__(threshold=threshold, task=task, **kwargs) register(id='SafeInvertedPendulum-v2', entry_point=SafeInvertedPendulumEnv, max_episode_steps=1000) diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py index 70352dbb9..8732d6e34 100644 --- a/omnisafe/evaluator.py +++ b/omnisafe/evaluator.py @@ -25,6 +25,7 @@ import torch from gymnasium.spaces import Box from gymnasium.utils.save_video import save_video +from torch import nn from omnisafe.algorithms.model_based.base.ensemble import EnsembleDynamicsModel from omnisafe.algorithms.model_based.planner import ( @@ -36,6 +37,16 @@ SafeARCPlanner, ) from omnisafe.common import Normalizer +from omnisafe.common.control_barrier_function.crabs.models import ( + AddGaussianNoise, + CrabsCore, + ExplorationPolicy, + MeanPolicy, + MultiLayerPerceptron, +) +from omnisafe.common.control_barrier_function.crabs.optimizers import Barrier +from omnisafe.common.control_barrier_function.crabs.utils import Normalizer as CRABSNormalizer +from omnisafe.common.control_barrier_function.crabs.utils import create_model_and_trainer from omnisafe.envs.core import CMDP, make from omnisafe.envs.wrapper import ActionRepeat, ActionScale, ObsNormalize, TimeLimit from omnisafe.models.actor import ActorBuilder @@ -291,6 +302,55 @@ def __load_model_and_env( self._actor = actor_builder.build_actor(actor_type) self._actor.load_state_dict(model_params['pi']) + if self._cfgs['algo'] in ['CRABS']: + self._init_crabs(model_params) + + def _init_crabs(self, model_params: dict) -> None: + mean_policy = MeanPolicy(self._actor) + assert self._env is not None, 'The environment must be provided or created.' + assert self._actor is not None, 'The actor must be provided or created.' + assert ( + self._env.observation_space.shape is not None + ), 'The observation space does not exist.' + assert self._env.action_space.shape is not None, 'The action space does not exist.' + normalizer = CRABSNormalizer(self._env.observation_space.shape[0], clip=1000).to( + torch.device('cpu'), + ) + model, _ = create_model_and_trainer( + self._cfgs, + self._env.observation_space.shape[0], + self._env.action_space.shape[0], + normalizer, + torch.device('cpu'), + ) + s0 = torch.tensor( + self._env.reset()[0], + device=torch.device('cpu'), + dtype=torch.float32, + ) + h = Barrier( + nn.Sequential( + normalizer, + MultiLayerPerceptron([self._env.observation_space.shape[0], 256, 256, 1]), + ), + # pylint: disable-next=protected-access + self._env._env.env.barrier_fn, # type: ignore + s0, + self._cfgs.lyapunov, + ).to(torch.device('cpu')) + h.load_state_dict(model_params['h']) + model.load_state_dict(model_params['models']) + core = CrabsCore(h, model, mean_policy, self._cfgs.crabs) # type: ignore + self._actor = ExplorationPolicy( + AddGaussianNoise( + self._actor, # type: ignore + 0.0, + self._cfgs.algo_cfgs.exploration_noise, + ), + core, + ) + self._actor.predict = self._actor.step # type: ignore + # pylint: disable-next=too-many-locals def load_saved( self, @@ -374,8 +434,13 @@ def evaluate( with torch.no_grad(): if self._actor is not None: act = self._actor.predict( - obs, + obs.reshape( + -1, + obs.shape[-1], # to make sure the shape is (1, obs_dim) + ), deterministic=True, + ).reshape( + -1, # to make sure the shape is (act_dim,) ) elif self._planner is not None: act = self._planner.output_action( @@ -407,7 +472,7 @@ def evaluate( episode_costs.append(ep_cost) episode_lengths.append(length) - print(f'Episode {episode+1} results:') + print(f'Episode {episode} results:') print(f'Episode reward: {ep_ret}') print(f'Episode cost: {ep_cost}') print(f'Episode length: {length}') @@ -497,8 +562,13 @@ def render( # pylint: disable=too-many-locals,too-many-arguments,too-many-branc with torch.no_grad(): if self._actor is not None: act = self._actor.predict( - obs, + obs.reshape( + -1, + obs.shape[-1], # to make sure the shape is (1, obs_dim) + ), deterministic=True, + ).reshape( + -1, # to make sure the shape is (act_dim,) ) elif self._planner is not None: act = self._planner.output_action( @@ -546,7 +616,7 @@ def render( # pylint: disable=too-many-locals,too-many-arguments,too-many-branc episode_costs.append(ep_cost) episode_lengths.append(length) with open(result_path, 'a+', encoding='utf-8') as f: - print(f'Episode {episode_idx+1} results:', file=f) + print(f'Episode {episode_idx} results:', file=f) print(f'Episode reward: {ep_ret}', file=f) print(f'Episode cost: {ep_cost}', file=f) print(f'Episode length: {length}', file=f) diff --git a/tests/.coveragerc b/tests/.coveragerc index c6e711965..dc827cc96 100644 --- a/tests/.coveragerc +++ b/tests/.coveragerc @@ -13,6 +13,7 @@ omit = ../omnisafe/envs/safety_isaac_gym_env.py ../omnisafe/utils/isaac_gym_utils.py ../omnisafe/envs/meta_drive_env.py + ../omnisafe/evaluator.py [report] exclude_lines =