ray-project · sven1977 · Jun 22, 2024 · Jun 20, 2024 · Jun 21, 2024 · Jun 21, 2024
@@ -333,6 +333,62 @@ py_test(
     srcs = ["tuned_examples/impala/cartpole_impala.py"],
     args = ["--as-test", "--enable-new-api-stack"]
 )
+py_test(
+    name = "learning_tests_cartpole_impala_gpu",
+    main = "tuned_examples/impala/cartpole_impala.py",
+    tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
+    size = "large",
+    srcs = ["tuned_examples/impala/cartpole_impala.py"],
+    args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
+)
+py_test(
+    name = "learning_tests_cartpole_impala_multi_cpu",
+    main = "tuned_examples/impala/cartpole_impala.py",
+    tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
+    size = "large",
+    srcs = ["tuned_examples/impala/cartpole_impala.py"],
+    args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
+)
+py_test(
+    name = "learning_tests_cartpole_impala_multi_gpu",
+    main = "tuned_examples/impala/cartpole_impala.py",
+    tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
+    size = "large",
+    srcs = ["tuned_examples/impala/cartpole_impala.py"],
+    args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
+)
+py_test(
+    name = "learning_tests_multi_agent_cartpole_impala",
+    main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "torch_only"],
+    size = "large",
+    srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+    args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "-stop-reward="]
+)
+py_test(
+    name = "learning_tests_multi_agent_cartpole_impala_gpu",
+    main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+    tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
+    size = "large",
+    srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+    args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=1"]
+)
+py_test(
+    name = "learning_tests_multi_agent_cartpole_impala_multi_cpu",
+    main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+    tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
+    size = "large",
+    srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+    args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2"]
+)
+py_test(
+    name = "learning_tests_multi_agent_cartpole_impala_multi_gpu",
+    main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
+    tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
+    size = "large",
+    srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+    args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2"]
+)
 
 #@OldAPIstack
 py_test(
@@ -346,18 +402,16 @@ py_test(
     ],
     args = ["--dir=tuned_examples/impala"]
 )
-
 #@OldAPIStack
 py_test(
     name = "learning_tests_multi_agent_cartpole_impala_old_api_stack",
     main = "tests/run_regression_tests.py",
     tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete"],
     size = "medium",
     srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
+    data = ["tuned_examples/impala/multi_agent_cartpole_impala_old_api_stack.py"],
     args = ["--dir=tuned_examples/impala"]
 )
-
 #@OldAPIStack
 py_test(
     name = "learning_tests_cartpole_impala_fake_gpus_old_api_stack",

@@ -1,43 +1,58 @@
-# @OldAPIStack
 from ray.rllib.algorithms.impala import ImpalaConfig
+from ray.rllib.connectors.env_to_module.mean_std_filter import MeanStdFilter
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
 from ray.rllib.utils.metrics import (
     ENV_RUNNER_RESULTS,
     EPISODE_RETURN_MEAN,
     NUM_ENV_STEPS_SAMPLED_LIFETIME,
 )
-from ray import tune
+from ray.rllib.utils.test_utils import add_rllib_example_script_args
+from ray.tune.registry import register_env
 
-tune.registry.register_env("env", lambda cfg: MultiAgentCartPole(config=cfg))
+parser = add_rllib_example_script_args()
+# Use `parser` to add your own custom command line options to this script
+# and (if needed) use their values toset up `config` below.
+args = parser.parse_args()
+
+register_env("env", lambda cfg: MultiAgentCartPole(config=cfg))
 
 
 config = (
     ImpalaConfig()
-    .environment("env", env_config={"num_agents": 4})
-    .env_runners(
-        num_envs_per_env_runner=5,
-        num_env_runners=4,
-        observation_filter="MeanStdFilter",
+    .api_stack(
+        enable_rl_module_and_learner=True,
+        enable_env_runner_and_connector_v2=True,
     )
-    .resources(num_gpus=1, _fake_gpus=True)
-    .multi_agent(
-        policies=["p0", "p1", "p2", "p3"],
-        policy_mapping_fn=(lambda agent_id, episode, worker, **kwargs: f"p{agent_id}"),
+    .environment("env", env_config={"num_agents": 2})
+    .env_runners(
+        env_to_module_connector=lambda env: MeanStdFilter(multi_agent=True),
     )
     .training(
-        num_sgd_iter=1,
-        vf_loss_coeff=0.005,
-        vtrace=True,
-        model={
-            "fcnet_hiddens": [32],
-            "fcnet_activation": "linear",
+        train_batch_size_per_learner=500,
+        grad_clip=40.0,
+        grad_clip_by="global_norm",
+        lr=0.0005,
+        vf_loss_coeff=0.1,
+    )
+    .rl_module(
+        model_config_dict={
             "vf_share_layers": True,
+            "uses_new_env_runners": True,
         },
-        replay_proportion=0.0,
+    )
+    .multi_agent(
+        policies=["p0", "p1"],
+        policy_mapping_fn=(lambda agent_id, episode, **kwargs: f"p{agent_id}"),
     )
 )
 
 stop = {
-    f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": 600,  # 600 / 4 (==num_agents) = 150
-    f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 200000,
+    f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": 800.0,
+    NUM_ENV_STEPS_SAMPLED_LIFETIME: 400000,
 }
+
+
+if __name__ == "__main__":
+    from ray.rllib.utils.test_utils import run_rllib_example_script_experiment
+
+    run_rllib_example_script_experiment(config, args, stop=stop)
@@ -0,0 +1,43 @@
+# @OldAPIStack
+from ray.rllib.algorithms.impala import ImpalaConfig
+from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
+from ray.rllib.utils.metrics import (
+    ENV_RUNNER_RESULTS,
+    EPISODE_RETURN_MEAN,
+    NUM_ENV_STEPS_SAMPLED_LIFETIME,
+)
+from ray import tune
+
+tune.registry.register_env("env", lambda cfg: MultiAgentCartPole(config=cfg))
+
+
+config = (
+    ImpalaConfig()
+    .environment("env", env_config={"num_agents": 4})
+    .env_runners(
+        num_envs_per_env_runner=5,
+        num_env_runners=4,
+        observation_filter="MeanStdFilter",
+    )
+    .resources(num_gpus=1, _fake_gpus=True)
+    .multi_agent(
+        policies=["p0", "p1", "p2", "p3"],
+        policy_mapping_fn=(lambda agent_id, episode, worker, **kwargs: f"p{agent_id}"),
+    )
+    .training(
+        num_sgd_iter=1,
+        vf_loss_coeff=0.005,
+        vtrace=True,
+        model={
+            "fcnet_hiddens": [32],
+            "fcnet_activation": "linear",
+            "vf_share_layers": True,
+        },
+        replay_proportion=0.0,
+    )
+)
+
+stop = {
+    f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": 600,  # 600 / 4 (==num_agents) = 150
+    f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 200000,
+}