chore: fix code style

PKU-Alignment · May 2, 2024 · 8639136 · 8639136
1 parent 0ef30f0
commit 8639136
Show file tree

Hide file tree

Showing 7 changed files with 59 additions and 8 deletions.
diff --git a/omnisafe/common/buffer/offpolicy_buffer.py b/omnisafe/common/buffer/offpolicy_buffer.py
@@ -42,6 +42,7 @@ class OffPolicyBuffer(BaseBuffer):
         act_space (OmnisafeSpace): The action space.
         size (int): The size of the buffer.
         batch_size (int): The batch size of the buffer.
+        penalty_coefficient (float, optional): The penalty coefficient. Defaults to 0.0.
         device (torch.device, optional): The device of the buffer. Defaults to
             ``torch.device('cpu')``.
 

diff --git a/omnisafe/common/buffer/vector_offpolicy_buffer.py b/omnisafe/common/buffer/vector_offpolicy_buffer.py
@@ -38,6 +38,7 @@ class VectorOffPolicyBuffer(OffPolicyBuffer):
         size (int): The size of the buffer.
         batch_size (int): The batch size of the buffer.
         num_envs (int): The number of environments.
+        penalty_coefficient (float, optional): The penalty coefficient. Defaults to 0.0.
         device (torch.device, optional): The device of the buffer. Defaults to
             ``torch.device('cpu')``.
 

diff --git a/omnisafe/configs/off-policy/SACLag.yaml b/omnisafe/configs/off-policy/SACLag.yaml
@@ -288,11 +288,11 @@ SafeMetaDrive:
       out_of_road_penalty: 1.
   # lagrangian configurations
   lagrange_cfgs:
-    # Tolerance of constraint violation
-    cost_limit: 1.0
-    # Initial value of lagrangian multiplier
+    # tolerance of constraint violation
+    cost_limit: 0.0
+    # initial value of lagrangian multiplier
     lagrangian_multiplier_init: 0.01
-    # Learning rate of lagrangian multiplier
+    # learning rate of lagrangian multiplier
     lambda_lr: 0.0001
-    # Type of lagrangian optimizer
+    # type of lagrangian optimizer
     lambda_optimizer: "Adam"
diff --git a/omnisafe/configs/on-policy/CPO.yaml b/omnisafe/configs/on-policy/CPO.yaml
@@ -126,8 +126,6 @@ defaults:
       activation: tanh
       # learning rate
       lr: 0.001
-  # environment specific configurations
-  env_cfgs: null
 
 SafeMetaDrive:
   # training configurations
@@ -159,11 +157,13 @@ SafeMetaDrive:
   model_cfgs:
     # actor network configurations
     actor:
+      # size of hidden layers
       hidden_sizes: [256, 256]
       # learning rate
       lr: 0.00005
     # critic network configurations
     critic:
+      # size of hidden layers
       hidden_sizes: [256, 256]
       # learning rate
       lr: 0.00005

diff --git a/omnisafe/configs/on-policy/PPO.yaml b/omnisafe/configs/on-policy/PPO.yaml
@@ -308,10 +308,14 @@ SafeMetaDrive:
   model_cfgs:
     # actor network configurations
     actor:
+      # hidden layer sizes
+      hidden_sizes: [256, 256]
       # learning rate
       lr: 0.00005
     # critic network configurations
     critic:
+      # hidden layer sizes
+      hidden_sizes: [256, 256]
       # learning rate
       lr: 0.00005
   # environment specific configurations

diff --git a/omnisafe/configs/on-policy/PPOLag.yaml b/omnisafe/configs/on-policy/PPOLag.yaml
@@ -154,6 +154,10 @@ ShadowHandCatchOver2UnderarmSafeFinger:
     use_critic_norm: False
     # reward discount factor
     gamma: 0.96
+    # normalize reward
+    reward_normalize: False
+    # normalize cost
+    cost_normalize: False
     # normalize observation
     obs_normalize: False
   # model configurations
@@ -206,6 +210,47 @@ ShadowHandOverSafeFinger:
       lr: 0.0006
 
 ShadowHandCatchOver2UnderarmSafeJoint:
+  # training configurations
+  train_cfgs:
+    # number of vectorized environments
+    vector_env_nums: 256
+    # total number of steps to train
+    total_steps: 100000000
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 38400
+    # number of iterations to update the policy
+    update_iters: 8
+    # batch size for each iteration
+    batch_size: 8192
+    # target kl divergence
+    target_kl: 0.016
+    # max gradient norm
+    max_grad_norm: 1.0
+    # use critic norm
+    use_critic_norm: False
+    # reward discount factor
+    gamma: 0.96
+    # normalize reward
+    reward_normalize: False
+    # normalize cost
+    cost_normalize: False
+    # normalize observation
+    obs_normalize: False
+  # model configurations
+  model_cfgs:
+    # actor network configurations
+    actor:
+      # hidden layer sizes
+      hidden_sizes: [1024, 1024, 512]
+    critic:
+      # hidden layer sizes
+      hidden_sizes: [1024, 1024, 512]
+      # learning rate
+      lr: 0.0006
+
+ShadowHandOverSafeJoint:
   # training configurations
   train_cfgs:
     # number of vectorized environments

diff --git a/omnisafe/envs/meta_drive_env.py b/omnisafe/envs/meta_drive_env.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.