Skip to content

Commit

Permalink
chore: update yaml (#93)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gaiejj authored and zmsn-2077 committed Feb 5, 2023
1 parent f0b2324 commit cc6f4c9
Showing 1 changed file with 0 additions and 133 deletions.
133 changes: 0 additions & 133 deletions omnisafe/configs/on-policy/PPO.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -286,136 +286,3 @@ SafetyRacecarGoal1-v0:
max_len: 100
# The number of threads used to sample data
num_threads: 20


SafetyHumanoidVelocity-v4:
# --------------------------------------Basic Configurations----------------------------------- #
## -----------------------------Basic configurations for base class PG------------------------ ##
# The random seed
seed: 0
# The torch device
device: cpu
# The torch device id
device_id: 0
# The environment wrapper type
wrapper_type: CMDPWrapper
# Number of epochs
epochs: 500
# Number of steps per epoch
steps_per_epoch: 32768
# Number of update iteration for Actor network
actor_iters: 40
# Number of update iteration for Critic network
critic_iters: 40
# Check if all models own the same parameter values every `check_freq` epoch
check_freq: 25
# Save model to disk every `check_freq` epochs
save_freq: 50
# Entropy coefficient for PPO loss
entropy_coef: 0.0
# The max length of per epoch
max_ep_len: 1000
# The size of mini batch
num_mini_batches: 64
# The learning rate of Actor network
actor_lr: 0.0003
# The learning rate of Critic network
critic_lr: 0.0003
# The Address for saving training process data
data_dir: "./runs"
## ---------------------------Basic configurations for derived class PPO---------------------- ##
# The thereshold for KL early stopping
target_kl: 0.02
# The size of batch for policy update
batch_size: 10000
# The clip range for PPO loss
clip: 0.2

# ---------------------------------------Optional Configuration-------------------------------- #
## -----------------------------------Configuration For Cost Critic--------------------------- ##
# Whether to use cost critic
use_cost: False
# Cost discounted factor
cost_gamma: 1.0
# Whether to use linear decay of learning rate
linear_lr_decay: True
# Whether to use exploration noise anneal
exploration_noise_anneal: False
# The coefficient of reward penalty
penalty_param: 0.0
# Whether to use KL early stopping
kl_early_stopping: True
# Whether to use max gradient norm
use_max_grad_norm: True
# The thereshold of max gradient norm
max_grad_norm: 40
# Whether to use critic network norm
use_critic_norm: True
# The norm coefficient of critic network
critic_norm_coeff: 0.001
## ---------------------------------------Configuration For Model----------------------------- ##
model_cfgs:
# Whether to share the weight of Actor network with Critic network
shared_weights: False
# The mode to initiate the weight of network, choosing from "kaiming_uniform", "xavier_normal", "glorot" and "orthogonal".
weight_initialization_mode: "kaiming_uniform"
# Type of Actor, choosing from "gaussian_annealing", "gaussian_std_net_actor", "gaussian_learning_actor", "categorical_actor"
actor_type: gaussian
# Configuration of Actor and Critic network
ac_kwargs:
# Configuration of Actor network
pi:
# Size of hidden layers
hidden_sizes: [64, 64]
# Activation function
activation: tanh
# Output activation function
output_activation: identity
# Whether to scale action.
scale_action: False
# Whether to clip action.
clip_action: False
# Whther to learn the standard deviation of Gaussian noise
std_learning: True
# The initial value of standard deviation of Gaussian noise
std_init: 1.0
# Configuration of Critic network
val:
# Number of critic networks
num_critics: 1
# Size of hidden layers
hidden_sizes: [64, 64]
# Activation function
activation: tanh
## --------------------------------------Configuration For Buffer----------------------------- ##
buffer_cfgs:
# Reward discounted factor
gamma: 0.99
# Parameters used to estimate future rewards in GAE
lam: 0.95
# Parameters used to estimate future costs in GAE
lam_c: 0.95
# Method to estimate the advantage reward/cost, choosing from "gae", "plain", "vtrace"
adv_estimation_method: gae
# Whether to use reward standardized advantage estimation
standardized_rew_adv: True
# Whether to use cost standardized advantage estimation
standardized_cost_adv: True
## --------------------------------------Configuration For Environment------------------------ ##
env_cfgs:
# The seed of environment
env_seed: 0
# The number of parallel environments
num_envs: 8
# Whether to use async environment
async_env: True
# Whether to use standardized reward
normalized_rew: True
# Whether to use standardized cost
normalized_cost: False
# Whether to use standardized obs
normalized_obs: False
# The maximum length of record queue
max_len: 100
# The number of threads used to sample data
num_threads: 20

0 comments on commit cc6f4c9

Please sign in to comment.