diff --git a/docs/source/benchmark/on-policy.md b/docs/source/benchmark/on-policy.md index 83b13308..030c75bf 100644 --- a/docs/source/benchmark/on-policy.md +++ b/docs/source/benchmark/on-policy.md @@ -271,7 +271,7 @@ class="smallcaps">SafetyHumanoidVelocity-v1 - ±- 1106.09 ± 607.6 677.43 ± 189.96 -762.73 ± 170.22-v0 +762.73 ± 170.22 SafetySwimmerVelocity-v1 47.31 ± 16.19 27.12 ±7.47 - ±- -113.28 ± 20.22-v0 +113.28 ± 20.22 37.93 ±8.68 273.86 ± 87.76 @@ -287,7 +287,7 @@ class="smallcaps">SafetySwimmerVelocity-v1 SafetyWalker2dVelocity-v1 1665 .00 ± 930.18 -373.63 ± 129.2-v0 +373.63 ± 129.2 - ±- 3806.39 ± 1547.48 3748.26 ± 1832.83 @@ -325,14 +325,14 @@ class="smallcaps">SafetyHalfCheetahVelocity-v1 3430.9 ± 239.38 - ±- 3313.31 ± 1048.78 -4255.73 ± 1053.82-v0 +4255.73 ± 1053.82 7185.06 ± 3650.82-v0 SafetyHopperVelocity-v1 2590.54 ± 631.05 -993.63 ± 489.42-v0 +993.63 ± 489.42 - ±- 2698.19 ± 568.80 1346.94 ± 984.09 @@ -345,7 +345,7 @@ class="smallcaps">SafetyHumanoidVelocity-v1 810.76 ± 270.69 - ±- 1461.51 ± 602.23 -749.42 ± 149.81-v0 +749.42 ± 149.81 2828.18 ± 2256.38 @@ -446,7 +446,7 @@ To demonstrate the high reliability of the algorithms implemented, OmniSafe offe SafetyAntVelocity-v1 5292.29 ± 913.44 -919.42 ± 158.61-v0 +919.42 ± 158.61 5547.20 ± 807.89 895.56 ± 77.13 6026.79 ± 314.98 @@ -460,7 +460,7 @@ class="smallcaps">SafetyHalfCheetahVelocity-v1 5188.46 ± 1202.76 896.55 ± 184.7 5878.28 ± 2012.24 -847.74 ± 249.02-v0 +847.74 ± 249.02 6490.76 ± 2507.18 734.26 ± 321.88 6921.83 ± 1721.79 @@ -484,7 +484,7 @@ class="smallcaps">SafetyHumanoidVelocity-v1 7001.78 ± 419.67 834.11 ± 212.43 8055.20 ± 641.67 -946.40 ± 9.11-v0 +946.40 ± 9.11 8681.24 ± 3934.08 718.42 ± 323.30 9115.93 ± 596.88 @@ -496,7 +496,7 @@ class="smallcaps">SafetySwimmerVelocity-v1 77.05 ±33.44 107.1 ±60.58 120.19 ± 7.74 -161.78 ± 17.51-v0 +161.78 ± 17.51 124.91 ± 6.13 176.56 ± 15.95 119.77 ± 13.8 @@ -508,7 +508,7 @@ class="smallcaps">SafetyWalker2dVelocity-v1 4832.34 ± 685.76 866.59 ± 93.47 5347.35 ± 436.86 -914.74 ± 32.61-v0 +914.74 ± 32.61 6096.67 ± 723.06 914.46 ± 27.85 6239.52 ± 879.99 @@ -521,7 +521,7 @@ class="smallcaps">SafetyCarGoal1-v0 57.46 ±48.34 36.07 ±1.25 58.06 ±10.03 -36.60 ±0.22-v0 +36.60 ±0.22 55.58 ±12.68 33.41 ±2.89 58.06 ±42.06 @@ -541,14 +541,14 @@ class="smallcaps">SafetyCarButton1-v0 SafetyCarGoal2-v0 -29.43 ±4.62-v0 +29.43 ±4.62 179.2 ±84.86 30.26 ±0.38 209.62 ± 29.97 32.17 ±1.24 190.74 ± 21.05 29.88 ±4.55 -194.16 ± 106.2-v0 +194.16 ± 106.2 SafetyCarButton2-v0 313.88 ± 58.20 20.51 ±3.34 316.42 ± 35.28 -21.35 ±8.22-v0 +21.35 ±8.22 312.64 ± 138.4 @@ -589,7 +589,7 @@ class="smallcaps">SafetyPointButton1-v0 SafetyPointGoal2-v0 -25.18 ±3.62-v0 +25.18 ±3.62 204.96 ± 104.97 26.19 ±0.84 193.60 ± 18.54 @@ -608,7 +608,7 @@ class="smallcaps">SafetyPointButton2-v0 28.78 ±2.05 170.30 ± 30.59 25.91 ±6.15 -166.6 ±111.21-v0 +166.6 ±111.21 @@ -634,9 +634,9 @@ class="smallcaps">SafetyPointButton2-v0 SafetyAntVelocity-v1 3139.52 ± 110.34 -12.34 ±3.11-v0 +12.34 ±3.11 3041.89 ± 180.77 -19.52 ±20.21-v0 +19.52 ±20.21 3261.87 ± 80.00 12.05 ±6.57 2636.62 ± 181.09 @@ -671,7 +671,7 @@ class="smallcaps">SafetyHopperVelocity-v1 class="smallcaps">SafetyHumanoidVelocity-v1 6286.51 ± 151.03 19.47 ±7.74 -6551.30 ± 58.42-v0 +6551.30 ± 58.42 59.56 ±117.37 6624.46 ± 25.9 5.87 ±9.46 @@ -681,10 +681,10 @@ class="smallcaps">SafetyHumanoidVelocity-v1 SafetySwimmerVelocity-v1 -61.29 ±18.12-v0 +61.29 ±18.12 22.60 ±1.16 81.18 ±16.33 -22.24 ±3.91-v0 +22.24 ±3.91 64.74 ±17.67 28.02 ±4.09 38.02 ±34.18 @@ -699,56 +699,56 @@ class="smallcaps">SafetyWalker2dVelocity-v1 14.98 ±9.27 2982.27 ± 681.55 13.49 ±14.55 -2713.57 ± 313.2-v0 +2713.57 ± 313.2 20.51 ±14.09 SafetyCarGoal1-v0 -18.71 ±2.72-v0 +18.71 ±2.72 23.10 ±12.57 -27.04 ±1.82-v0 +27.04 ±1.82 26.80 ±5.64 13.27 ±9.26 21.72 ±32.06 --1.10 ±6.851-v0 +-1.10 ±6.851 50.58 ±99.24 SafetyCarButton1-v0 -2.04 ±2.98 -43.48 ±31.52-v0 +43.48 ±31.52 -0.38 ±0.85 -37.54 ±31.72-v0 +37.54 ±31.72 0.33 ±1.96 55.5 ±89.64 --2.06 ±7.2-v0 -43.78 ±98.01-v0 +-2.06 ±7.2 +43.78 ±98.01 SafetyCarGoal2-v0 2.30 ±1.76 -22.90 ±16.22-v0 +22.90 ±16.22 3.65 ±1.09 39.98 ±20.29 1.58 ±2.49 -13.82 ±24.62-v0 --0.07 ±1.62-v0 +13.82 ±24.62 +-0.07 ±1.62 43.86 ±99.58 SafetyCarButton2-v0 --1.35 ±2.41-v0 +-1.35 ±2.41 42.02 ±31.77 -1.68 ±2.55 20.36 ±13.67 -0.76 ±2.52-v0 +0.76 ±2.52 47.86 ±103.27 -0.11 ±0.72-v0 -85.94 ±122.01-v0 +0.11 ±0.72 +85.94 ±122.01 SafetyPointGoal1-v0 22.98 ±8.45 12.96 ±6.95 25.80 ±34.99 -1.6 ±3.01-v0 +1.6 ±3.01 31.1 ±80.03 SafetyPointButton1-v0 3.65 ±4.47 -26.30 ±9.22-v0 +26.30 ±9.22 6.93 ±1.84 31.16 ±20.58 4.60 ±4.73 20.8 ±35.78 -0.34 ±1.53 -52.86 ±85.62-v0 +52.86 ±85.62 SafetyPointGoal2-v0 26.00 ±4.70 1.98 ±3.86 41.20 ±61.03 -0.34 ±2.2-v0 +0.34 ±2.2 65.84 ±195.76 @@ -822,25 +822,25 @@ class="smallcaps">SafetyPointButton2-v0 SafetyAntVelocity-v1 3215.79 ± 346.68 -18.25 ±17.12-v0 +18.25 ±17.12 2257.07 ± 47.97 -10.44 ±5.22-v0 +10.44 ±5.22 3184.48 ± 305.59 14.75 ±6.36 3098.54 ± 78.90 -14.12 ±3.41-v0 +14.12 ±3.41 SafetyHalfCheetahVelocity-v1 2850.6 ± 244.65 4.27 ±4.46 -1677.93 ± 217.31-v0 +1677.93 ± 217.31 19.06 ±15.26 2965.2 ± 290.43 2.37 ±3.5 2786.48 ± 173.45 -4.70 ±6.72-v0 +4.70 ±6.72 SafetyHopperVelocity-v1 1437.75 ± 446.87 10.13 ±8.87 1713.71 ± 18.26 -13.40 ±5.82-v0 +13.40 ±5.82 SafetyHumanoidVelocity-v1 6109.94 ± 497.56 24.69 ±20.54 -5852.25 ± 78.01-v0 +5852.25 ± 78.01 0.24 ±0.48 -6489.39 ± 35.1-v0 +6489.39 ± 35.1 13.86 ±39.33 6465.34 ± 79.87 0.18 ±0.36 @@ -876,7 +876,7 @@ class="smallcaps">SafetySwimmerVelocity-v1 53.87 ±17.9 29.75 ±7.33 65.30 ±43.25 -18.22 ±8.01-v0 +18.22 ±8.01 SafetyWalker2dVelocity-v1 3117.05 ± 53.60 8.78 ±12.38 2074.76 ± 962.45 -21.90 ±9.41-v0 +21.90 ±9.41 SafetyCarGoal1-v0 21.56 ±2.87 38.42 ±8.36 15.23 ±10.76 -31.66 ±93.51-v0 +31.66 ±93.51 25.52 ±2.65 43.32 ±14.35 @@ -906,13 +906,13 @@ class="smallcaps">SafetyCarGoal1-v0 SafetyCarButton1-v0 1.49 ±2.84 -103.24 ± 123.12-v0 +103.24 ± 123.12 0.36 ±0.85 40.52 ±21.25 0.21 ±2.27 31.78 ±47.03 0.82 ±1.60 -37.86 ±27.41-v0 +37.86 ±27.41 SafetyCarGoal2-v0 48.12 ±31.19 2.09 ±4.33 31.56 ±58.93 -3.56 ±0.92-v0 -32.66 ±3.31-v0 +3.56 ±0.92 +32.66 ±3.31 SafetyCarButton2-v0 1.49 ±2.64 173.68 ± 163.77 -0.66 ±0.42-v0 +0.66 ±0.42 49.72 ±36.50 1.14 ±3.18 46.78 ±57.47 @@ -943,9 +943,9 @@ class="smallcaps">SafetyCarButton2-v0 class="smallcaps">SafetyPointGoal1-v0 14.42 ±6.74 19.02 ±20.08 -18.57 ±1.71-v0 +18.57 ±1.71 22.98 ±6.56 -14.97 ±9.01-v0 +14.97 ±9.01 33.72 ±42.24 20.46 ±1.38 28.84 ±7.76 @@ -960,7 +960,7 @@ class="smallcaps">SafetyPointButton1-v0 5.89 ±7.66 38.24 ±42.96 4.04 ±4.54 -40.00 ±4.52-v0 +40.00 ±4.52 SafetyPointGoal2-v0 1.06 ±0.69 51.92 ±47.40 2.21 ±4.15 -37.92 ±111.81-v0 +37.92 ±111.81 2.50 ±1.25 -40.84 ±23.31-v0 +40.84 ±23.31 SafetyPointButton2-v0 1.05 ±1.27 41.14 ±12.35 2.43 ±3.33 -17.92 ±26.1-v0 +17.92 ±26.1 5.09 ±1.83 48.92 ±17.79 @@ -1010,7 +1010,7 @@ class="smallcaps">SafetyPointButton2-v0 SafetyAntVelocity-v1 2978.74 ± 93.65 -16.77 ±0.92-v0 +16.77 ±0.92 2507.65 ± 63.97 8.036 ±0.39 2944.84 ± 60.53 @@ -1028,7 +1028,7 @@ class="smallcaps">SafetyHalfCheetahVelocity-v1 2922.17 ± 24.84 16.14 ±0.14 2737.79 ± 37.53 -16.44 ±0.21-v0 +16.44 ±0.21 SafetyHopperVelocity-v1 1368.28 ± 576.08 10.38 ±4.38 1699.94 ± 24.25 -17.04 ±0.41-v0 +17.04 ±0.41 1608.41 ± 88.23 16.30 ±0.30 @@ -1046,10 +1046,10 @@ class="smallcaps">SafetyHopperVelocity-v1 SafetyHumanoidVelocity-v1 6401.00 ± 32.23 -17.10 ±2.41-v0 +17.10 ±2.41 5759.44 ± 75.73 -15.84 ±1.42-v0 -6401.85 ± 57.62-v0 +15.84 ±1.42 +6401.85 ± 57.62 11.06 ±5.35 6411.32 ± 44.26 13.04 ±2.68 @@ -1060,16 +1060,16 @@ class="smallcaps">SafetySwimmerVelocity-v1 35.61 ±4.37 3.44 ±1.35 34.72 ±1.37 -10.19 ±2.32-v0 +10.19 ±2.32 77.52 ±40.20 -0.98 ±1.91-v0 +0.98 ±1.91 51.39 ±40.09 0.00 ±0.00 SafetyWalker2dVelocity-v1 -2410.89 ± 241.22-v0 +2410.89 ± 241.22 18.88 ±2.38 2548.82 ± 891.65 13.21 ±6.09 @@ -1081,13 +1081,13 @@ class="smallcaps">SafetyWalker2dVelocity-v1 SafetyCarGoal1-v0 -7.12 ±5.41-v0 -21.68 ±29.11-v0 +7.12 ±5.41 +21.68 ±29.11 16.67 ±10.57 23.58 ±26.39 8.45 ±7.16 18.98 ±25.63 -15.08 ±13.41-v0 +15.08 ±13.41 23.22 ±19.80 @@ -1106,12 +1106,12 @@ class="smallcaps">SafetyCarButton1-v0 SafetyCarGoal2-v0 0.90 ±1.20 -19.98 ±10.12-v0 +19.98 ±10.12 1.76 ±5.20 31.50 ±45.50 -1.02 ±1.41-v0 -27.32 ±60.12-v0 -0.93 ±2.21-v0 +1.02 ±1.41 +27.32 ±60.12 +0.93 ±2.21 26.66 ±60.07 @@ -1130,11 +1130,11 @@ class="smallcaps">SafetyCarButton2-v0 SafetyPointGoal1-v0 7.06 ±5.85 -20.04 ±21.91-v0 +20.04 ±21.91 16.18 ±9.55 29.94 ±26.68 8.30 ±6.03 -25.32 ±31.91-v0 +25.32 ±31.91 11.64 ±8.46 30.00 ±27.67 @@ -1142,10 +1142,10 @@ class="smallcaps">SafetyPointGoal1-v0 SafetyPointButton1-v0 -1.47 ±0.98 -22.60 ±13.91-v0 --3.13 ±3.51-v0 +22.60 ±13.91 +-3.13 ±3.51 9.04 ±3.94 --1.97 ±1.41-v0 +-1.97 ±1.41 12.80 ±7.84 -1.36 ±0.37 2.14 ±1.73 @@ -1154,21 +1154,21 @@ class="smallcaps">SafetyPointButton1-v0 SafetyPointGoal2-v0 0.84 ±2.93 -14.06 ±30.21-v0 -1.64 ±4.02-v0 +14.06 ±30.21 +1.64 ±4.02 19.00 ±34.69 -0.56 ±2.52-v0 +0.56 ±2.52 12.36 ±43.39 1.55 ±4.68 -14.90 ±27.82-v0 +14.90 ±27.82 SafetyPointButton2-v0 --1.38 ±0.11-v0 +-1.38 ±0.11 12.00 ±8.60 -2.56 ±0.67 -17.27 ±10.01-v0 +17.27 ±10.01 -1.70 ±0.29 7.90 ±3.30 -1.66 ±0.99 @@ -1202,14 +1202,14 @@ class="smallcaps">SafetyAntVelocity-v1 15.22 ±3.68 2801.53 ± 19.66 0.23 ±0.09 -3052.63 ± 58.41-v0 +3052.63 ± 58.41 0.40 ±0.23 SafetyHalfCheetahVelocity-v1 -2837.89 ± 398.52-v0 -8.06 ±9.62-v0 +2837.89 ± 398.52 +8.06 ±9.62 2796.75 ± 190.84 11.16 ±9.80 2447.25 ± 346.84 @@ -1220,9 +1220,9 @@ class="smallcaps">SafetyHalfCheetahVelocity-v1 SafetyHopperVelocity-v1 -1713.29 ± 10.21-v0 +1713.29 ± 10.21 8.96 ±4.28 -1178.59 ± 646.71-v0 +1178.59 ± 646.71 18.76 ±8.93 1643.39 ± 2.58 0.77 ±0.26 @@ -1233,7 +1233,7 @@ class="smallcaps">SafetyHopperVelocity-v1 SafetyHumanoidVelocity-v1 6579.26 ± 55.70 -3.76 ±3.61-v0 +3.76 ±3.61 6407.95 ± 254.06 7.38 ±11.34 6321.45 ± 35.73 @@ -1246,11 +1246,11 @@ class="smallcaps">SafetyHumanoidVelocity-v1 class="smallcaps">SafetySwimmerVelocity-v1 91.05 ±62.68 19.12 ±8.33 -69.75 ±46.52-v0 +69.75 ±46.52 20.48 ±9.13 33.02 ±7.26 24.23 ±0.54 -39.24 ±5.01-v0 +39.24 ±5.01 23.20 ±0.48 @@ -1268,9 +1268,9 @@ class="smallcaps">SafetyWalker2dVelocity-v1 SafetyCarGoal1-v0 -10.60 ±2.51-v0 +10.60 ±2.51 30.66 ±7.53 -25.49 ±1.31-v0 +25.49 ±1.31 28.92 ±7.66 17.92 ±1.54 21.60 ±0.83 @@ -1283,17 +1283,17 @@ class="smallcaps">SafetyCarButton1-v0 -1.36 ±0.68 14.62 ±9.40 -0.31 ±0.49 -15.24 ±17.01-v0 -4.47 ±1.12-v0 +15.24 ±17.01 +4.47 ±1.12 25.00 ±0.00 -4.34 ±0.72-v0 +4.34 ±0.72 25.00 ±0.00 SafetyCarGoal2-v0 -0.13 ±1.11-v0 -23.50 ±1.22-v0 +0.13 ±1.11 +23.50 ±1.22 1.77 ±1.20 17.43 ±12.13 6.59 ±0.58 @@ -1305,7 +1305,7 @@ class="smallcaps">SafetyCarGoal2-v0 SafetyCarButton2-v0 -1.59 ±0.70 -39.97 ±26.91-v0 +39.97 ±26.91 -2.95 ±4.03 27.90 ±6.37 4.86 ±1.57 @@ -1321,14 +1321,14 @@ class="smallcaps">SafetyPointGoal1-v0 19.24 ±3.94 21.38 ±6.96 16.03 ±8.60 -19.17 ±9.42-v0 +19.17 ±9.42 16.31 ±6.99 22.10 ±6.13 SafetyPointButton1-v0 -1.18 ±1.02-v0 +1.18 ±1.02 29.42 ±12.10 6.40 ±1.43 27.90 ±13.27