From 6ff8b60628b30bab91af568ddfec1573078db5aa Mon Sep 17 00:00:00 2001 From: sven1977 Date: Thu, 18 Apr 2024 15:14:00 +0200 Subject: [PATCH 1/3] wip Signed-off-by: sven1977 --- doc/source/rllib/rllib-examples.rst | 2 +- rllib/BUILD | 54 +++++++++---------- ...v2_frame_stacking.py => frame_stacking.py} | 0 ...std_filtering.py => mean_std_filtering.py} | 16 +++--- ...tion_spaces.py => nested_action_spaces.py} | 0 ...spaces.py => nested_observation_spaces.py} | 0 ...ewards.py => prev_actions_prev_rewards.py} | 0 rllib/examples/nested_action_spaces.py | 2 +- 8 files changed, 37 insertions(+), 37 deletions(-) rename rllib/examples/connectors/{connector_v2_frame_stacking.py => frame_stacking.py} (100%) rename rllib/examples/connectors/{connector_v2_mean_std_filtering.py => mean_std_filtering.py} (91%) rename rllib/examples/connectors/{connector_v2_nested_action_spaces.py => nested_action_spaces.py} (100%) rename rllib/examples/connectors/{connector_v2_nested_observation_spaces.py => nested_observation_spaces.py} (100%) rename rllib/examples/connectors/{connector_v2_prev_actions_prev_rewards.py => prev_actions_prev_rewards.py} (100%) diff --git a/doc/source/rllib/rllib-examples.rst b/doc/source/rllib/rllib-examples.rst index 31d1c90bf1a7..c9662741299e 100644 --- a/doc/source/rllib/rllib-examples.rst +++ b/doc/source/rllib/rllib-examples.rst @@ -125,7 +125,7 @@ Multi-Agent and Hierarchical Special Action- and Observation Spaces -------------------------------------- -- `Nested action spaces `__: +- `Nested action spaces `__: Learning in arbitrarily nested action spaces. - `Parametric actions `__: Example of how to handle variable-length or parametric action spaces. diff --git a/rllib/BUILD b/rllib/BUILD index 1f3316bb4e85..da5e0fafeb8a 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -2155,87 +2155,87 @@ py_test( # .................................... # Framestacking examples only run in smoke-test mode (a few iters only). py_test( - name = "examples/connectors/connector_v2_frame_stacking_ppo", - main = "examples/connectors/connector_v2_frame_stacking.py", + name = "examples/connectors/frame_stacking_ppo", + main = "examples/connectors/frame_stacking.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", - srcs = ["examples/connectors/connector_v2_frame_stacking.py"], + srcs = ["examples/connectors/frame_stacking.py"], args = ["--enable-new-api-stack", "--stop-iter=2", "--framework=torch", "--algo=PPO"] ) py_test( - name = "examples/connectors/connector_v2_frame_stacking_multi_agent_ppo", - main = "examples/connectors/connector_v2_frame_stacking.py", + name = "examples/connectors/frame_stacking_multi_agent_ppo", + main = "examples/connectors/frame_stacking.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", - srcs = ["examples/connectors/connector_v2_frame_stacking.py"], + srcs = ["examples/connectors/frame_stacking.py"], args = ["--enable-new-api-stack", "--num-agents=2", "--stop-iter=2", "--framework=torch", "--algo=PPO", "--num-env-runners=4", "--num-cpus=6"] ) # Nested action spaces (flattening obs and learning w/ multi-action distribution). py_test( - name = "examples/connectors/connector_v2_nested_action_spaces_ppo", - main = "examples/connectors/connector_v2_nested_action_spaces.py", + name = "examples/connectors/nested_action_spaces_ppo", + main = "examples/connectors/nested_action_spaces.py", tags = ["team:rllib", "exclusive", "examples"], size = "large", - srcs = ["examples/connectors/connector_v2_nested_action_spaces.py"], + srcs = ["examples/connectors/nested_action_spaces.py"], args = ["--enable-new-api-stack", "--as-test", "--framework=torch", "--stop-reward=-500.0", "--algo=PPO"] ) py_test( - name = "examples/connectors/connector_v2_nested_action_spaces_multi_agent_ppo", - main = "examples/connectors/connector_v2_nested_action_spaces.py", + name = "examples/connectors/nested_action_spaces_multi_agent_ppo", + main = "examples/connectors/nested_action_spaces.py", tags = ["team:rllib", "exclusive", "examples"], size = "large", - srcs = ["examples/connectors/connector_v2_nested_action_spaces.py"], + srcs = ["examples/connectors/nested_action_spaces.py"], args = ["--enable-new-api-stack", "--as-test", "--num-agents=2", "--framework=torch", "--stop-reward=-1000.0", "--algo=PPO"] ) # Nested observation spaces (flattening). py_test( - name = "examples/connectors/connector_v2_nested_observation_spaces_ppo", - main = "examples/connectors/connector_v2_nested_observation_spaces.py", + name = "examples/connectors/nested_observation_spaces_ppo", + main = "examples/connectors/nested_observation_spaces.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", - srcs = ["examples/connectors/connector_v2_nested_observation_spaces.py"], + srcs = ["examples/connectors/nested_observation_spaces.py"], args = ["--enable-new-api-stack", "--as-test", "--stop-reward=400.0", "--framework=torch", "--algo=PPO"] ) py_test( - name = "examples/connectors/connector_v2_nested_observation_spaces_multi_agent_ppo", - main = "examples/connectors/connector_v2_nested_observation_spaces.py", + name = "examples/connectors/nested_observation_spaces_multi_agent_ppo", + main = "examples/connectors/nested_observation_spaces.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", - srcs = ["examples/connectors/connector_v2_nested_observation_spaces.py"], + srcs = ["examples/connectors/nested_observation_spaces.py"], args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=800.0", "--framework=torch", "--algo=PPO"] ) # Prev-r/prev actions + LSTM example. py_test( - name = "examples/connectors/connector_v2_prev_actions_prev_rewards_ppo", - main = "examples/connectors/connector_v2_prev_actions_prev_rewards.py", + name = "examples/connectors/prev_actions_prev_rewards_ppo", + main = "examples/connectors/prev_actions_prev_rewards.py", tags = ["team:rllib", "exclusive", "examples"], size = "large", - srcs = ["examples/connectors/connector_v2_prev_actions_prev_rewards.py"], + srcs = ["examples/connectors/prev_actions_prev_rewards.py"], args = ["--enable-new-api-stack", "--as-test", "--stop-reward=200.0", "--framework=torch", "--algo=PPO", "--num-env-runners=4", "--num-cpus=6"] ) py_test( - name = "examples/connectors/connector_v2_prev_actions_prev_rewards_multi_agent_ppo", - main = "examples/connectors/connector_v2_prev_actions_prev_rewards.py", + name = "examples/connectors/prev_actions_prev_rewards_multi_agent_ppo", + main = "examples/connectors/prev_actions_prev_rewards.py", tags = ["team:rllib", "exclusive", "examples"], size = "large", - srcs = ["examples/connectors/connector_v2_prev_actions_prev_rewards.py"], + srcs = ["examples/connectors/prev_actions_prev_rewards.py"], args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=400.0", "--framework=torch", "--algo=PPO", "--num-env-runners=4", "--num-cpus=6"] ) # MeanStd filtering example. py_test( - name = "examples/connectors/connector_v2_mean_std_filtering_ppo", - main = "examples/connectors/connector_v2_mean_std_filtering.py", + name = "examples/connectors/mean_std_filtering_ppo", + main = "examples/connectors/mean_std_filtering.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", - srcs = ["examples/connectors/connector_v2_mean_std_filtering.py"], + srcs = ["examples/connectors/mean_std_filtering.py"], args = ["--enable-new-api-stack", "--as-test", "--stop-reward=-300.0", "--framework=torch", "--algo=PPO", "--num-env-runners=2"] ) diff --git a/rllib/examples/connectors/connector_v2_frame_stacking.py b/rllib/examples/connectors/frame_stacking.py similarity index 100% rename from rllib/examples/connectors/connector_v2_frame_stacking.py rename to rllib/examples/connectors/frame_stacking.py diff --git a/rllib/examples/connectors/connector_v2_mean_std_filtering.py b/rllib/examples/connectors/mean_std_filtering.py similarity index 91% rename from rllib/examples/connectors/connector_v2_mean_std_filtering.py rename to rllib/examples/connectors/mean_std_filtering.py index f579e60756a2..dfd8b618d4b9 100644 --- a/rllib/examples/connectors/connector_v2_mean_std_filtering.py +++ b/rllib/examples/connectors/mean_std_filtering.py @@ -54,14 +54,14 @@ vf_clip_param=10.0, vf_loss_coeff=0.01, ) - # .evaluation( - # evaluation_num_workers=1, - # evaluation_parallel_to_training=True, - # evaluation_interval=1, - # evaluation_duration=10, - # evaluation_duration_unit="episodes", - # evaluation_config={"explore": False}, - # ) + .evaluation( + evaluation_num_workers=1, + evaluation_parallel_to_training=True, + evaluation_interval=1, + evaluation_duration=10, + evaluation_duration_unit="episodes", + evaluation_config={"explore": False}, + ) ) if args.enable_new_api_stack: config = config.rl_module( diff --git a/rllib/examples/connectors/connector_v2_nested_action_spaces.py b/rllib/examples/connectors/nested_action_spaces.py similarity index 100% rename from rllib/examples/connectors/connector_v2_nested_action_spaces.py rename to rllib/examples/connectors/nested_action_spaces.py diff --git a/rllib/examples/connectors/connector_v2_nested_observation_spaces.py b/rllib/examples/connectors/nested_observation_spaces.py similarity index 100% rename from rllib/examples/connectors/connector_v2_nested_observation_spaces.py rename to rllib/examples/connectors/nested_observation_spaces.py diff --git a/rllib/examples/connectors/connector_v2_prev_actions_prev_rewards.py b/rllib/examples/connectors/prev_actions_prev_rewards.py similarity index 100% rename from rllib/examples/connectors/connector_v2_prev_actions_prev_rewards.py rename to rllib/examples/connectors/prev_actions_prev_rewards.py diff --git a/rllib/examples/nested_action_spaces.py b/rllib/examples/nested_action_spaces.py index fe637ba135cb..1546b3b65874 100644 --- a/rllib/examples/nested_action_spaces.py +++ b/rllib/examples/nested_action_spaces.py @@ -1,6 +1,6 @@ msg = """ This script has been moved to -`ray.rllib.examples.connectors.connector_v2_nested_action_spaces.py` +`ray.rllib.examples.connectors.nested_action_spaces.py` """ raise NotImplementedError(msg) From c7521ae5c9554f045e8b4959ae8d7564b2db04f9 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Thu, 18 Apr 2024 16:26:15 +0200 Subject: [PATCH 2/3] wip Signed-off-by: sven1977 --- rllib/BUILD | 6 +++--- rllib/examples/rl_modules/frame_stacking_rlm.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rllib/BUILD b/rllib/BUILD index da5e0fafeb8a..5df6c19d8899 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -2240,11 +2240,11 @@ py_test( ) py_test( - name = "examples/connectors/connector_v2_mean_std_filtering_multi_agent_ppo", - main = "examples/connectors/connector_v2_mean_std_filtering.py", + name = "examples/connectors/mean_std_filtering_multi_agent_ppo", + main = "examples/connectors/mean_std_filtering.py", tags = ["team:rllib", "exclusive", "examples"], size = "medium", - srcs = ["examples/connectors/connector_v2_mean_std_filtering.py"], + srcs = ["examples/connectors/mean_std_filtering.py"], args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=-600.0", "--framework=torch", "--algo=PPO", "--num-env-runners=5", "--num-cpus=6"] ) diff --git a/rllib/examples/rl_modules/frame_stacking_rlm.py b/rllib/examples/rl_modules/frame_stacking_rlm.py index acde07daeff4..4ed592fa8705 100644 --- a/rllib/examples/rl_modules/frame_stacking_rlm.py +++ b/rllib/examples/rl_modules/frame_stacking_rlm.py @@ -6,7 +6,7 @@ ConnectorV2). Take a look at this example script here, which shows how you can do frame- stacking with RLlib's new ConnectorV2 API. -`ray.rllib.examples.connectors.connector_v2_frame_stacking.py` +`ray.rllib.examples.connectors.frame_stacking.py` """ raise NotImplementedError(msg) From 71413865708f071b803f012ce6559047a37bb4d5 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Thu, 18 Apr 2024 18:36:43 +0200 Subject: [PATCH 3/3] wip Signed-off-by: sven1977 --- rllib/examples/connectors/mean_std_filtering.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rllib/examples/connectors/mean_std_filtering.py b/rllib/examples/connectors/mean_std_filtering.py index dfd8b618d4b9..f579e60756a2 100644 --- a/rllib/examples/connectors/mean_std_filtering.py +++ b/rllib/examples/connectors/mean_std_filtering.py @@ -54,14 +54,14 @@ vf_clip_param=10.0, vf_loss_coeff=0.01, ) - .evaluation( - evaluation_num_workers=1, - evaluation_parallel_to_training=True, - evaluation_interval=1, - evaluation_duration=10, - evaluation_duration_unit="episodes", - evaluation_config={"explore": False}, - ) + # .evaluation( + # evaluation_num_workers=1, + # evaluation_parallel_to_training=True, + # evaluation_interval=1, + # evaluation_duration=10, + # evaluation_duration_unit="episodes", + # evaluation_config={"explore": False}, + # ) ) if args.enable_new_api_stack: config = config.rl_module(