ray-project · sven1977 · Jan 31, 2023 · Jan 20, 2023 · Jan 29, 2023
@@ -3330,6 +3330,14 @@ py_test(
     args = ["--stop-iters=4", "--framework=torch"]
 )
 
+py_test(
+    name = "examples/multi_agent_independent_learning",
+    main = "examples/multi_agent_independent_learning.py",
+    tags = ["team:rllib", "examples"],
+    size = "medium",
+    srcs = ["examples/multi_agent_independent_learning.py"],
+    args = ["--stop-iters=4", "--framework=torch"]
+)
 
 py_test(
     name = "examples/multi_agent_two_trainers_tf",

@@ -1,38 +1,58 @@
+import argparse
+
 from ray import air, tune
 from ray.tune.registry import register_env
 from ray.rllib.algorithms.apex_ddpg import ApexDDPGConfig
 from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
-from pettingzoo.sisl import waterworld_v3
+from pettingzoo.sisl import waterworld_v4
 
-# TODO (Kourosh): Noticed that the env is broken and throws an error in this test.
-# The error is ValueError: Input vector should be 1-D. (Could be pettingzoo version
-# issue)
 # Based on code from github.com/parametersharingmadrl/parametersharingmadrl
 
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--num-gpus",
+    type=int,
+    default=1,
+    help="Number of GPUs to use for training.",
+)
+parser.add_argument(
+    "--as-test",
+    action="store_true",
+    help="Whether this script should be run as a test: Only one episode will be "
+    "sampled.",
+)
+parser.add_argument()
+
 if __name__ == "__main__":
-    # RDQN - Rainbow DQN
-    # ADQN - Apex DQN
+    args = parser.parse_args()
+
     def env_creator(args):
-        return PettingZooEnv(waterworld_v3.env())
+        return PettingZooEnv(waterworld_v4.env())
 
     env = env_creator({})
     register_env("waterworld", env_creator)
 
     config = (
         ApexDDPGConfig()
         .environment("waterworld")
-        .resources(num_gpus=1)
+        .resources(num_gpus=args.num_gpus)
         .rollouts(num_rollout_workers=2)
         .multi_agent(
             policies=env.get_agent_ids(),
             policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id),
         )
     )
 
+    if args.as_test:
+        # Only a compilation test of running waterworld / independent learning.
+        stop = {"iterations": 1}
+    else:
+        stop = {"episodes_total": 60000}
+
     tune.Tuner(
         "APEX_DDPG",
         run_config=air.RunConfig(
-            stop={"episodes_total": 60000},
+            stop=stop,
             checkpoint_config=air.CheckpointConfig(
                 checkpoint_frequency=10,
             ),

@@ -1,7 +1,7 @@
 from ray import air, tune
 from ray.tune.registry import register_env
 from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
-from pettingzoo.sisl import waterworld_v3
+from pettingzoo.sisl import waterworld_v4
 
 # TODO (Kourosh): Noticed that the env is broken and throws an error in this test.
 # The error is ValueError: Input vector should be 1-D. (Could be pettingzoo version
@@ -12,7 +12,7 @@
     # RDQN - Rainbow DQN
     # ADQN - Apex DQN
 
-    register_env("waterworld", lambda _: PettingZooEnv(waterworld_v3.env()))
+    register_env("waterworld", lambda _: PettingZooEnv(waterworld_v4.env()))
 
     tune.Tuner(
         "APEX_DDPG",