diff --git a/rllib/BUILD b/rllib/BUILD index 46642d737365..f65a255bef86 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -3330,6 +3330,14 @@ py_test( args = ["--stop-iters=4", "--framework=torch"] ) +py_test( + name = "examples/multi_agent_independent_learning", + main = "examples/multi_agent_independent_learning.py", + tags = ["team:rllib", "examples"], + size = "medium", + srcs = ["examples/multi_agent_independent_learning.py"], + args = ["--stop-iters=4", "--framework=torch"] +) py_test( name = "examples/multi_agent_two_trainers_tf", diff --git a/rllib/examples/multi_agent_independent_learning.py b/rllib/examples/multi_agent_independent_learning.py index 416e48bacbf4..b88673b451a4 100644 --- a/rllib/examples/multi_agent_independent_learning.py +++ b/rllib/examples/multi_agent_independent_learning.py @@ -1,19 +1,33 @@ +import argparse + from ray import air, tune from ray.tune.registry import register_env from ray.rllib.algorithms.apex_ddpg import ApexDDPGConfig from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv -from pettingzoo.sisl import waterworld_v3 +from pettingzoo.sisl import waterworld_v4 -# TODO (Kourosh): Noticed that the env is broken and throws an error in this test. -# The error is ValueError: Input vector should be 1-D. (Could be pettingzoo version -# issue) # Based on code from github.com/parametersharingmadrl/parametersharingmadrl +parser = argparse.ArgumentParser() +parser.add_argument( + "--num-gpus", + type=int, + default=1, + help="Number of GPUs to use for training.", +) +parser.add_argument( + "--as-test", + action="store_true", + help="Whether this script should be run as a test: Only one episode will be " + "sampled.", +) +parser.add_argument() + if __name__ == "__main__": - # RDQN - Rainbow DQN - # ADQN - Apex DQN + args = parser.parse_args() + def env_creator(args): - return PettingZooEnv(waterworld_v3.env()) + return PettingZooEnv(waterworld_v4.env()) env = env_creator({}) register_env("waterworld", env_creator) @@ -21,7 +35,7 @@ def env_creator(args): config = ( ApexDDPGConfig() .environment("waterworld") - .resources(num_gpus=1) + .resources(num_gpus=args.num_gpus) .rollouts(num_rollout_workers=2) .multi_agent( policies=env.get_agent_ids(), @@ -29,10 +43,16 @@ def env_creator(args): ) ) + if args.as_test: + # Only a compilation test of running waterworld / independent learning. + stop = {"iterations": 1} + else: + stop = {"episodes_total": 60000} + tune.Tuner( "APEX_DDPG", run_config=air.RunConfig( - stop={"episodes_total": 60000}, + stop=stop, checkpoint_config=air.CheckpointConfig( checkpoint_frequency=10, ), diff --git a/rllib/examples/multi_agent_parameter_sharing.py b/rllib/examples/multi_agent_parameter_sharing.py index e2833fcb5a75..e40aa25e60b1 100644 --- a/rllib/examples/multi_agent_parameter_sharing.py +++ b/rllib/examples/multi_agent_parameter_sharing.py @@ -1,7 +1,7 @@ from ray import air, tune from ray.tune.registry import register_env from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv -from pettingzoo.sisl import waterworld_v3 +from pettingzoo.sisl import waterworld_v4 # TODO (Kourosh): Noticed that the env is broken and throws an error in this test. # The error is ValueError: Input vector should be 1-D. (Could be pettingzoo version @@ -12,7 +12,7 @@ # RDQN - Rainbow DQN # ADQN - Apex DQN - register_env("waterworld", lambda _: PettingZooEnv(waterworld_v3.env())) + register_env("waterworld", lambda _: PettingZooEnv(waterworld_v4.env())) tune.Tuner( "APEX_DDPG",