From cc6f4c9736777aa1a6e4ffbee07c6efce7879e60 Mon Sep 17 00:00:00 2001 From: Jiayi Zhou <108712610+Gaiejj@users.noreply.github.com> Date: Wed, 1 Feb 2023 23:55:48 +0800 Subject: [PATCH] chore: update yaml (#93) --- omnisafe/configs/on-policy/PPO.yaml | 133 ---------------------------- 1 file changed, 133 deletions(-) diff --git a/omnisafe/configs/on-policy/PPO.yaml b/omnisafe/configs/on-policy/PPO.yaml index 83b6681c0..fa736071d 100644 --- a/omnisafe/configs/on-policy/PPO.yaml +++ b/omnisafe/configs/on-policy/PPO.yaml @@ -286,136 +286,3 @@ SafetyRacecarGoal1-v0: max_len: 100 # The number of threads used to sample data num_threads: 20 - - -SafetyHumanoidVelocity-v4: - # --------------------------------------Basic Configurations----------------------------------- # - ## -----------------------------Basic configurations for base class PG------------------------ ## - # The random seed - seed: 0 - # The torch device - device: cpu - # The torch device id - device_id: 0 - # The environment wrapper type - wrapper_type: CMDPWrapper - # Number of epochs - epochs: 500 - # Number of steps per epoch - steps_per_epoch: 32768 - # Number of update iteration for Actor network - actor_iters: 40 - # Number of update iteration for Critic network - critic_iters: 40 - # Check if all models own the same parameter values every `check_freq` epoch - check_freq: 25 - # Save model to disk every `check_freq` epochs - save_freq: 50 - # Entropy coefficient for PPO loss - entropy_coef: 0.0 - # The max length of per epoch - max_ep_len: 1000 - # The size of mini batch - num_mini_batches: 64 - # The learning rate of Actor network - actor_lr: 0.0003 - # The learning rate of Critic network - critic_lr: 0.0003 - # The Address for saving training process data - data_dir: "./runs" - ## ---------------------------Basic configurations for derived class PPO---------------------- ## - # The thereshold for KL early stopping - target_kl: 0.02 - # The size of batch for policy update - batch_size: 10000 - # The clip range for PPO loss - clip: 0.2 - - # ---------------------------------------Optional Configuration-------------------------------- # - ## -----------------------------------Configuration For Cost Critic--------------------------- ## - # Whether to use cost critic - use_cost: False - # Cost discounted factor - cost_gamma: 1.0 - # Whether to use linear decay of learning rate - linear_lr_decay: True - # Whether to use exploration noise anneal - exploration_noise_anneal: False - # The coefficient of reward penalty - penalty_param: 0.0 - # Whether to use KL early stopping - kl_early_stopping: True - # Whether to use max gradient norm - use_max_grad_norm: True - # The thereshold of max gradient norm - max_grad_norm: 40 - # Whether to use critic network norm - use_critic_norm: True - # The norm coefficient of critic network - critic_norm_coeff: 0.001 - ## ---------------------------------------Configuration For Model----------------------------- ## - model_cfgs: - # Whether to share the weight of Actor network with Critic network - shared_weights: False - # The mode to initiate the weight of network, choosing from "kaiming_uniform", "xavier_normal", "glorot" and "orthogonal". - weight_initialization_mode: "kaiming_uniform" - # Type of Actor, choosing from "gaussian_annealing", "gaussian_std_net_actor", "gaussian_learning_actor", "categorical_actor" - actor_type: gaussian - # Configuration of Actor and Critic network - ac_kwargs: - # Configuration of Actor network - pi: - # Size of hidden layers - hidden_sizes: [64, 64] - # Activation function - activation: tanh - # Output activation function - output_activation: identity - # Whether to scale action. - scale_action: False - # Whether to clip action. - clip_action: False - # Whther to learn the standard deviation of Gaussian noise - std_learning: True - # The initial value of standard deviation of Gaussian noise - std_init: 1.0 - # Configuration of Critic network - val: - # Number of critic networks - num_critics: 1 - # Size of hidden layers - hidden_sizes: [64, 64] - # Activation function - activation: tanh - ## --------------------------------------Configuration For Buffer----------------------------- ## - buffer_cfgs: - # Reward discounted factor - gamma: 0.99 - # Parameters used to estimate future rewards in GAE - lam: 0.95 - # Parameters used to estimate future costs in GAE - lam_c: 0.95 - # Method to estimate the advantage reward/cost, choosing from "gae", "plain", "vtrace" - adv_estimation_method: gae - # Whether to use reward standardized advantage estimation - standardized_rew_adv: True - # Whether to use cost standardized advantage estimation - standardized_cost_adv: True - ## --------------------------------------Configuration For Environment------------------------ ## - env_cfgs: - # The seed of environment - env_seed: 0 - # The number of parallel environments - num_envs: 8 - # Whether to use async environment - async_env: True - # Whether to use standardized reward - normalized_rew: True - # Whether to use standardized cost - normalized_cost: False - # Whether to use standardized obs - normalized_obs: False - # The maximum length of record queue - max_len: 100 - # The number of threads used to sample data - num_threads: 20