Skip to content

Commit

Permalink
chore: fix code style
Browse files Browse the repository at this point in the history
  • Loading branch information
Gaiejj committed May 2, 2024
1 parent 0ef30f0 commit 8639136
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 8 deletions.
1 change: 1 addition & 0 deletions omnisafe/common/buffer/offpolicy_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class OffPolicyBuffer(BaseBuffer):
act_space (OmnisafeSpace): The action space.
size (int): The size of the buffer.
batch_size (int): The batch size of the buffer.
penalty_coefficient (float, optional): The penalty coefficient. Defaults to 0.0.
device (torch.device, optional): The device of the buffer. Defaults to
``torch.device('cpu')``.
Expand Down
1 change: 1 addition & 0 deletions omnisafe/common/buffer/vector_offpolicy_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class VectorOffPolicyBuffer(OffPolicyBuffer):
size (int): The size of the buffer.
batch_size (int): The batch size of the buffer.
num_envs (int): The number of environments.
penalty_coefficient (float, optional): The penalty coefficient. Defaults to 0.0.
device (torch.device, optional): The device of the buffer. Defaults to
``torch.device('cpu')``.
Expand Down
10 changes: 5 additions & 5 deletions omnisafe/configs/off-policy/SACLag.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -288,11 +288,11 @@ SafeMetaDrive:
out_of_road_penalty: 1.
# lagrangian configurations
lagrange_cfgs:
# Tolerance of constraint violation
cost_limit: 1.0
# Initial value of lagrangian multiplier
# tolerance of constraint violation
cost_limit: 0.0
# initial value of lagrangian multiplier
lagrangian_multiplier_init: 0.01
# Learning rate of lagrangian multiplier
# learning rate of lagrangian multiplier
lambda_lr: 0.0001
# Type of lagrangian optimizer
# type of lagrangian optimizer
lambda_optimizer: "Adam"
4 changes: 2 additions & 2 deletions omnisafe/configs/on-policy/CPO.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,6 @@ defaults:
activation: tanh
# learning rate
lr: 0.001
# environment specific configurations
env_cfgs: null

SafeMetaDrive:
# training configurations
Expand Down Expand Up @@ -159,11 +157,13 @@ SafeMetaDrive:
model_cfgs:
# actor network configurations
actor:
# size of hidden layers
hidden_sizes: [256, 256]
# learning rate
lr: 0.00005
# critic network configurations
critic:
# size of hidden layers
hidden_sizes: [256, 256]
# learning rate
lr: 0.00005
Expand Down
4 changes: 4 additions & 0 deletions omnisafe/configs/on-policy/PPO.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -308,10 +308,14 @@ SafeMetaDrive:
model_cfgs:
# actor network configurations
actor:
# hidden layer sizes
hidden_sizes: [256, 256]
# learning rate
lr: 0.00005
# critic network configurations
critic:
# hidden layer sizes
hidden_sizes: [256, 256]
# learning rate
lr: 0.00005
# environment specific configurations
Expand Down
45 changes: 45 additions & 0 deletions omnisafe/configs/on-policy/PPOLag.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ ShadowHandCatchOver2UnderarmSafeFinger:
use_critic_norm: False
# reward discount factor
gamma: 0.96
# normalize reward
reward_normalize: False
# normalize cost
cost_normalize: False
# normalize observation
obs_normalize: False
# model configurations
Expand Down Expand Up @@ -206,6 +210,47 @@ ShadowHandOverSafeFinger:
lr: 0.0006

ShadowHandCatchOver2UnderarmSafeJoint:
# training configurations
train_cfgs:
# number of vectorized environments
vector_env_nums: 256
# total number of steps to train
total_steps: 100000000
# algorithm configurations
algo_cfgs:
# number of steps to update the policy
steps_per_epoch: 38400
# number of iterations to update the policy
update_iters: 8
# batch size for each iteration
batch_size: 8192
# target kl divergence
target_kl: 0.016
# max gradient norm
max_grad_norm: 1.0
# use critic norm
use_critic_norm: False
# reward discount factor
gamma: 0.96
# normalize reward
reward_normalize: False
# normalize cost
cost_normalize: False
# normalize observation
obs_normalize: False
# model configurations
model_cfgs:
# actor network configurations
actor:
# hidden layer sizes
hidden_sizes: [1024, 1024, 512]
critic:
# hidden layer sizes
hidden_sizes: [1024, 1024, 512]
# learning rate
lr: 0.0006

ShadowHandOverSafeJoint:
# training configurations
train_cfgs:
# number of vectorized environments
Expand Down
2 changes: 1 addition & 1 deletion omnisafe/envs/meta_drive_env.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 OmniSafe Team. All Rights Reserved.
# Copyright 2024 OmniSafe Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down

0 comments on commit 8639136

Please sign in to comment.