From 7401b39720b095b302b63cb5bc22af71d803aa79 Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Fri, 28 Apr 2023 16:50:37 +0200 Subject: [PATCH] [RLlib] Fix double '::' in RLlib release test yaml files. (#34865) --- .../todo_tests_currently_not_covered.yaml | 4 ++-- .../yaml_files/a2c/a2c-breakout-v5.yaml | 2 +- .../a3c/a3c-pongdeterministic-v5.yaml | 2 +- .../apex/apex-breakoutnoframeskip-v5.yaml | 2 +- .../appo/appo-pongnoframeskip-v5.yaml | 2 +- .../yaml_files/bc/bc-halfcheetah-v4.yaml | 2 +- .../yaml_files/cql/cql-halfcheetah-v4.yaml | 2 +- .../yaml_files/ddpg/ddpg-hopper-v4.yaml | 2 +- .../dqn/dqn-breakoutnoframeskip-v5.yaml | 2 +- .../yaml_files/es/es-humanoid-v4.yaml | 2 +- .../impala/impala-breakoutnoframeskip-v5.yaml | 2 +- .../marwil/marwil-halfcheetah-v4.yaml | 2 +- .../ppo/tf/ppo-breakoutnoframeskip-v5-tf.yaml | 2 +- .../ppo-breakoutnoframeskip-v5-torch.yaml | 2 +- .../yaml_files/sac/sac-halfcheetah-v4.yaml | 2 +- .../slateq-interest-evolution-recsim-env.yaml | 2 +- .../yaml_files/td3/td3-halfcheetah-v4.yaml | 2 +- .../multi_gpu_learning_tests.yaml | 20 +++++++++---------- ...lti_gpu_with_attention_learning_tests.yaml | 12 +++++------ .../multi_gpu_with_lstm_learning_tests.yaml | 14 ++++++------- 20 files changed, 41 insertions(+), 41 deletions(-) diff --git a/release/rllib_tests/learning_tests/todo_tests_currently_not_covered.yaml b/release/rllib_tests/learning_tests/todo_tests_currently_not_covered.yaml index b8f75379547b..f769c8fd07d5 100644 --- a/release/rllib_tests/learning_tests/todo_tests_currently_not_covered.yaml +++ b/release/rllib_tests/learning_tests/todo_tests_currently_not_covered.yaml @@ -5,7 +5,7 @@ # run: ARS # # Minimum reward and total ts (in given time_total_s) to pass this test. # pass_criteria: -# sampler_results/episode_reward_mean:: 100.0 +# sampler_results/episode_reward_mean: 100.0 # timesteps_total: 2000000 # stop: # time_total_s: 2000 @@ -29,7 +29,7 @@ # run: DDPPO # # Minimum reward and total ts (in given time_total_s) to pass this test. # pass_criteria: -# sampler_results/episode_reward_mean:: 50.0 +# sampler_results/episode_reward_mean: 50.0 # timesteps_total: 10000000 # stop: # time_total_s: 3600 diff --git a/release/rllib_tests/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml index be28e4aee400..c38c9f8fffb0 100644 --- a/release/rllib_tests/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml @@ -3,7 +3,7 @@ a2c-breakoutnoframeskip-v5: run: A2C # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 50.0 + sampler_results/episode_reward_mean: 50.0 timesteps_total: 5000000 stop: time_total_s: 7200 diff --git a/release/rllib_tests/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml index 9918de78a74f..3ea52a704525 100644 --- a/release/rllib_tests/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml @@ -3,7 +3,7 @@ a3c-pongdeterministic-v5: run: A3C # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 18.0 + sampler_results/episode_reward_mean: 18.0 timesteps_total: 5000000 stop: time_total_s: 3600 diff --git a/release/rllib_tests/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml index e277cb364eda..81c8fdd20e48 100644 --- a/release/rllib_tests/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml @@ -3,7 +3,7 @@ apex-breakoutnoframeskip-v5: run: APEX # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 100.0 + sampler_results/episode_reward_mean: 100.0 timesteps_total: 12000000 stop: time_total_s: 7200 diff --git a/release/rllib_tests/learning_tests/yaml_files/appo/appo-pongnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/appo/appo-pongnoframeskip-v5.yaml index 4b25f5e105e9..9b5e5a84f9bc 100644 --- a/release/rllib_tests/learning_tests/yaml_files/appo/appo-pongnoframeskip-v5.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/appo/appo-pongnoframeskip-v5.yaml @@ -3,7 +3,7 @@ appo-pongnoframeskip-v5: run: APPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 18.0 + sampler_results/episode_reward_mean: 18.0 timesteps_total: 5000000 stop: time_total_s: 1800 diff --git a/release/rllib_tests/learning_tests/yaml_files/bc/bc-halfcheetah-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/bc/bc-halfcheetah-v4.yaml index 04c9b7bb6f22..199022e32d99 100644 --- a/release/rllib_tests/learning_tests/yaml_files/bc/bc-halfcheetah-v4.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/bc/bc-halfcheetah-v4.yaml @@ -2,7 +2,7 @@ bc-halfcheetah-v0: env: HalfCheetah-v4 run: BC pass_criteria: - evaluation/sampler_results/episode_reward_mean:: 400.0 + evaluation/sampler_results/episode_reward_mean: 400.0 timesteps_total: 2500000 stop: time_total_s: 1800 diff --git a/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetah-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetah-v4.yaml index 55ad047ffc73..32b7299b9f7f 100644 --- a/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetah-v4.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetah-v4.yaml @@ -2,7 +2,7 @@ cql-halfcheetah-v4: env: HalfCheetah-v4 run: CQL pass_criteria: - evaluation/sampler_results/episode_reward_mean:: 400.0 + evaluation/sampler_results/episode_reward_mean: 400.0 # Can not check throughput for offline methods. timesteps_total: 5000000 stop: diff --git a/release/rllib_tests/learning_tests/yaml_files/ddpg/ddpg-hopper-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/ddpg/ddpg-hopper-v4.yaml index c40ce18dd007..17149db121b4 100644 --- a/release/rllib_tests/learning_tests/yaml_files/ddpg/ddpg-hopper-v4.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/ddpg/ddpg-hopper-v4.yaml @@ -3,7 +3,7 @@ ddpg-hopper-v4: run: DDPG # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 110.0 + sampler_results/episode_reward_mean: 110.0 timesteps_total: 50000 stop: time_total_s: 1800 diff --git a/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml index 2662838c8611..2da9c8ac89cc 100644 --- a/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml @@ -3,7 +3,7 @@ dqn-breakoutnoframeskip-v5: run: DQN # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 20.0 + sampler_results/episode_reward_mean: 20.0 timesteps_total: 400000 stop: time_total_s: 7200 diff --git a/release/rllib_tests/learning_tests/yaml_files/es/es-humanoid-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/es/es-humanoid-v4.yaml index 94262af242c8..90825f64217f 100644 --- a/release/rllib_tests/learning_tests/yaml_files/es/es-humanoid-v4.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/es/es-humanoid-v4.yaml @@ -3,7 +3,7 @@ es-humanoid-v4: run: ES # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 100.0 + sampler_results/episode_reward_mean: 100.0 timesteps_total: 5000000 stop: time_total_s: 3600 diff --git a/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml index ef9a408d630d..2a12ca052256 100644 --- a/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml @@ -3,7 +3,7 @@ impala-breakoutnoframeskip-v5: run: IMPALA # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 200.0 + sampler_results/episode_reward_mean: 200.0 timesteps_total: 6000000 stop: time_total_s: 2400 diff --git a/release/rllib_tests/learning_tests/yaml_files/marwil/marwil-halfcheetah-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/marwil/marwil-halfcheetah-v4.yaml index 5bfc11256d93..59ff10051cfb 100644 --- a/release/rllib_tests/learning_tests/yaml_files/marwil/marwil-halfcheetah-v4.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/marwil/marwil-halfcheetah-v4.yaml @@ -3,7 +3,7 @@ marwil-halfcheetah-v4: run: MARWIL pass_criteria: # Can not check throughput for offline methods. - evaluation/sampler_results/episode_reward_mean:: 400.0 + evaluation/sampler_results/episode_reward_mean: 400.0 timesteps_total: 2500000 stop: time_total_s: 1800 diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/tf/ppo-breakoutnoframeskip-v5-tf.yaml b/release/rllib_tests/learning_tests/yaml_files/ppo/tf/ppo-breakoutnoframeskip-v5-tf.yaml index 8bb51cd0ff95..175fb47f3ccc 100644 --- a/release/rllib_tests/learning_tests/yaml_files/ppo/tf/ppo-breakoutnoframeskip-v5-tf.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/ppo/tf/ppo-breakoutnoframeskip-v5-tf.yaml @@ -3,7 +3,7 @@ ppo-breakoutnoframeskip-v5: run: PPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 50.0 + sampler_results/episode_reward_mean: 50.0 timesteps_total: 7000000 stop: time_total_s: 3600 diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/torch/ppo-breakoutnoframeskip-v5-torch.yaml b/release/rllib_tests/learning_tests/yaml_files/ppo/torch/ppo-breakoutnoframeskip-v5-torch.yaml index e2b2a43604b8..22e0d3826ee9 100644 --- a/release/rllib_tests/learning_tests/yaml_files/ppo/torch/ppo-breakoutnoframeskip-v5-torch.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/ppo/torch/ppo-breakoutnoframeskip-v5-torch.yaml @@ -3,7 +3,7 @@ ppo-breakoutnoframeskip-v5: run: PPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 50.0 + sampler_results/episode_reward_mean: 50.0 timesteps_total: 7000000 stop: # This is double the time we use for tf because of 2x throughput there. diff --git a/release/rllib_tests/learning_tests/yaml_files/sac/sac-halfcheetah-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/sac/sac-halfcheetah-v4.yaml index dd57dcd79e59..979bda086a3d 100644 --- a/release/rllib_tests/learning_tests/yaml_files/sac/sac-halfcheetah-v4.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/sac/sac-halfcheetah-v4.yaml @@ -3,7 +3,7 @@ sac-halfcheetah-v4: run: SAC # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 500.0 + sampler_results/episode_reward_mean: 500.0 timesteps_total: 400000 stop: time_total_s: 3600 diff --git a/release/rllib_tests/learning_tests/yaml_files/slateq/slateq-interest-evolution-recsim-env.yaml b/release/rllib_tests/learning_tests/yaml_files/slateq/slateq-interest-evolution-recsim-env.yaml index 9a716345d2e0..d7170509d8e1 100644 --- a/release/rllib_tests/learning_tests/yaml_files/slateq/slateq-interest-evolution-recsim-env.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/slateq/slateq-interest-evolution-recsim-env.yaml @@ -2,7 +2,7 @@ slateq-interest-evolution-recsim-env: env: ray.rllib.examples.env.recommender_system_envs_with_recsim.InterestEvolutionRecSimEnv run: SlateQ pass_criteria: - sampler_results/episode_reward_mean:: 160.0 + sampler_results/episode_reward_mean: 160.0 timesteps_total: 300000 stop: time_total_s: 7200 diff --git a/release/rllib_tests/learning_tests/yaml_files/td3/td3-halfcheetah-v4.yaml b/release/rllib_tests/learning_tests/yaml_files/td3/td3-halfcheetah-v4.yaml index a796d28a3ce5..96d4381c7dbe 100644 --- a/release/rllib_tests/learning_tests/yaml_files/td3/td3-halfcheetah-v4.yaml +++ b/release/rllib_tests/learning_tests/yaml_files/td3/td3-halfcheetah-v4.yaml @@ -3,7 +3,7 @@ td3-halfcheetah-v4: run: TD3 # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 400.0 + sampler_results/episode_reward_mean: 400.0 timesteps_total: 1000000 stop: time_total_s: 3600 diff --git a/release/rllib_tests/multi_gpu_learning_tests/multi_gpu_learning_tests.yaml b/release/rllib_tests/multi_gpu_learning_tests/multi_gpu_learning_tests.yaml index 3c4277f49a98..8a312996532a 100644 --- a/release/rllib_tests/multi_gpu_learning_tests/multi_gpu_learning_tests.yaml +++ b/release/rllib_tests/multi_gpu_learning_tests/multi_gpu_learning_tests.yaml @@ -42,7 +42,7 @@ appo-cartpole-v1-vtrace: run: APPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -65,7 +65,7 @@ ddpg-repeat-after-me-env: run: DDPG # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: -50.0 + sampler_results/episode_reward_mean: -50.0 timesteps_total: 8000 stop: time_total_s: 600 @@ -85,7 +85,7 @@ dqn-cartpole-v1: run: DQN # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 50000 stop: time_total_s: 600 @@ -105,7 +105,7 @@ impala-cartpole-v1: run: IMPALA # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -120,7 +120,7 @@ pg-cartpole-v1: run: PG # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 130.0 + sampler_results/episode_reward_mean: 130.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -138,7 +138,7 @@ ppo-cartpole-v1: run: PPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 300000 stop: time_total_s: 600 @@ -161,7 +161,7 @@ sac-repeat-after-me-env: run: SAC # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 40.0 + sampler_results/episode_reward_mean: 40.0 timesteps_total: 4500 stop: time_total_s: 600 @@ -183,7 +183,7 @@ sac-repeat-after-me-env-continuous: run: SAC # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: -50.0 + sampler_results/episode_reward_mean: -50.0 timesteps_total: 4500 stop: time_total_s: 600 @@ -208,7 +208,7 @@ simpleq-cartpole-v1: run: SimpleQ # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 85000 stop: time_total_s: 600 @@ -221,7 +221,7 @@ td3-repeat-after-me-env: run: TD3 # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: -50.0 + sampler_results/episode_reward_mean: -50.0 timesteps_total: 25000 stop: time_total_s: 600 diff --git a/release/rllib_tests/multi_gpu_with_attention_learning_tests/multi_gpu_with_attention_learning_tests.yaml b/release/rllib_tests/multi_gpu_with_attention_learning_tests/multi_gpu_with_attention_learning_tests.yaml index e1109d535fdc..8491f98a81f9 100644 --- a/release/rllib_tests/multi_gpu_with_attention_learning_tests/multi_gpu_with_attention_learning_tests.yaml +++ b/release/rllib_tests/multi_gpu_with_attention_learning_tests/multi_gpu_with_attention_learning_tests.yaml @@ -4,7 +4,7 @@ appo-stateless-cartpole-no-vtrace: run: APPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -38,7 +38,7 @@ appo-stateless-cartpole-vtrace: run: APPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -71,7 +71,7 @@ impala-stateless-cartpole: run: IMPALA # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -98,7 +98,7 @@ pg-stateless-cartpole: run: PG # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 130.0 + sampler_results/episode_reward_mean: 130.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -126,7 +126,7 @@ ppo-stateless-cartpole: run: PPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 200000 stop: time_total_s: 600 @@ -160,7 +160,7 @@ ppo-stateless-cartpole: # run: R2D2 # # Minimum reward and total ts (in given time_total_s) to pass this test. # pass_criteria: -# sampler_results/episode_reward_mean:: 150.0 +# sampler_results/episode_reward_mean: 150.0 # timesteps_total: 130000 # stop: # time_total_s: 1200 diff --git a/release/rllib_tests/multi_gpu_with_lstm_learning_tests/multi_gpu_with_lstm_learning_tests.yaml b/release/rllib_tests/multi_gpu_with_lstm_learning_tests/multi_gpu_with_lstm_learning_tests.yaml index dcd692a1ebcf..911c8ba0e8ef 100644 --- a/release/rllib_tests/multi_gpu_with_lstm_learning_tests/multi_gpu_with_lstm_learning_tests.yaml +++ b/release/rllib_tests/multi_gpu_with_lstm_learning_tests/multi_gpu_with_lstm_learning_tests.yaml @@ -4,7 +4,7 @@ a2c-stateless-cartpole: run: A2C # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -24,7 +24,7 @@ appo-stateless-cartpole-no-vtrace: run: APPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -50,7 +50,7 @@ appo-stateless-cartpole-vtrace: run: APPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -75,7 +75,7 @@ impala-stateless-cartpole: run: IMPALA # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -94,7 +94,7 @@ pg-stateless-cartpole: run: PG # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 130.0 + sampler_results/episode_reward_mean: 130.0 timesteps_total: 500000 stop: time_total_s: 600 @@ -114,7 +114,7 @@ ppo-stateless-cartpole: run: PPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: - sampler_results/episode_reward_mean:: 150.0 + sampler_results/episode_reward_mean: 150.0 timesteps_total: 200000 stop: time_total_s: 600 @@ -140,7 +140,7 @@ ppo-stateless-cartpole: # run: R2D2 # # Minimum reward and total ts (in given time_total_s) to pass this test. # pass_criteria: -# sampler_results/episode_reward_mean:: 150.0 +# sampler_results/episode_reward_mean: 150.0 # timesteps_total: 65000 # stop: # time_total_s: 800