From a003833ed64df450e4b763f1cc19c888e20f1aa0 Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Mon, 3 Jan 2022 12:15:53 +0100 Subject: [PATCH 1/4] Revert "Revert "[RLlib] Updated pettingzoo wrappers, env versions, urls (#20113)" (#21338)" This reverts commit 489e6945a671b7af2cf1f047b2ad2c879286ec73. --- .buildkite/pipeline.gpu.large.yml | 4 +- doc/source/rllib-env.rst | 6 +- python/requirements/ml/requirements_rllib.txt | 4 +- rllib/env/tests/test_remote_worker_envs.py | 4 +- rllib/env/wrappers/pettingzoo_env.py | 64 +++++++++---------- .../multi_agent_independent_learning.py | 4 +- .../examples/multi_agent_parameter_sharing.py | 4 +- rllib/tests/test_pettingzoo_env.py | 6 +- 8 files changed, 46 insertions(+), 50 deletions(-) diff --git a/.buildkite/pipeline.gpu.large.yml b/.buildkite/pipeline.gpu.large.yml index 034480c71bdb..7aa3a3b098ff 100644 --- a/.buildkite/pipeline.gpu.large.yml +++ b/.buildkite/pipeline.gpu.large.yml @@ -22,7 +22,9 @@ conditions: ["RAY_CI_RLLIB_AFFECTED"] commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT - - RLLIB_TESTING=1 ./ci/travis/install-dependencies.sh + - RLLIB_TESTING=1 PYTHON=3.7 ./ci/travis/install-dependencies.sh + # Because Python version changed, we need to re-install Ray here + - rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/travis/ci.sh build - pip install -Ur ./python/requirements_ml_docker.txt - ./ci/travis/env_info.sh # --jobs 2 is necessary as we only need to have at least 2 gpus on the machine diff --git a/doc/source/rllib-env.rst b/doc/source/rllib-env.rst index bef5ad40ab06..975868d58090 100644 --- a/doc/source/rllib-env.rst +++ b/doc/source/rllib-env.rst @@ -213,17 +213,17 @@ To scale to hundreds of agents, MultiAgentEnv batches policy evaluations across PettingZoo Multi-Agent Environments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -`PettingZoo `__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example +`PettingZoo `__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example .. code-block:: python from ray.tune.registry import register_env # import the pettingzoo environment - from pettingzoo.butterfly import prison_v2 + from pettingzoo.butterfly import prison_v3 # import rllib pettingzoo interface from ray.rllib.env import PettingZooEnv # define how to make the environment. This way takes an optional environment config, num_floors - env_creator = lambda config: prison_v2.env(num_floors=config.get("num_floors", 4)) + env_creator = lambda config: prison_v3.env(num_floors=config.get("num_floors", 4)) # register that way to make the environment under an rllib name register_env('prison', lambda config: PettingZooEnv(env_creator(config))) # now you can use `prison` as an environment diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt index a9b500e40165..f3585627317b 100644 --- a/python/requirements/ml/requirements_rllib.txt +++ b/python/requirements/ml/requirements_rllib.txt @@ -11,9 +11,9 @@ kaggle_environments==1.7.11 # Unity3D testing mlagents_envs==0.27.0 # For tests on PettingZoo's multi-agent envs. -pettingzoo==1.11.1 +pettingzoo==1.14.0 pymunk==6.0.0 -supersuit==2.6.6 +supersuit==3.3.2 # For testing in MuJoCo-like envs (in PyBullet). pybullet==3.2.0 # For tests on RecSim and Kaggle envs. diff --git a/rllib/env/tests/test_remote_worker_envs.py b/rllib/env/tests/test_remote_worker_envs.py index fac76c8f26d7..7c0627966cff 100644 --- a/rllib/env/tests/test_remote_worker_envs.py +++ b/rllib/env/tests/test_remote_worker_envs.py @@ -1,6 +1,6 @@ import gym import numpy as np -from pettingzoo.butterfly import pistonball_v4 +from pettingzoo.butterfly import pistonball_v5 from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0 import unittest @@ -15,7 +15,7 @@ # Function that outputs the environment you wish to register. def env_creator(config): - env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2)) + env = pistonball_v5.env() env = dtype_v0(env, dtype=np.float32) env = color_reduction_v0(env, mode="R") env = normalize_obs_v0(env) diff --git a/rllib/env/wrappers/pettingzoo_env.py b/rllib/env/wrappers/pettingzoo_env.py index 9c45b6224cdc..028f90073a31 100644 --- a/rllib/env/wrappers/pettingzoo_env.py +++ b/rllib/env/wrappers/pettingzoo_env.py @@ -4,7 +4,7 @@ class PettingZooEnv(MultiAgentEnv): """An interface to the PettingZoo MARL environment library. - See: https://github.com/PettingZoo-Team/PettingZoo + See: https://github.com/Farama-Foundation/PettingZoo Inherits from MultiAgentEnv and exposes a given AEC (actor-environment-cycle) game from the PettingZoo project via the @@ -15,7 +15,7 @@ class PettingZooEnv(MultiAgentEnv): 1. All agents have the same action_spaces and observation_spaces. Note: If, within your aec game, agents do not have homogeneous action / observation spaces, apply SuperSuit wrappers - to apply padding functionality: https://github.com/PettingZoo-Team/ + to apply padding functionality: https://github.com/Farama-Foundation/ SuperSuit#built-in-multi-agent-only-functions 2. Environments are positive sum games (-> Agents are expected to cooperate to maximize reward). This isn't a hard restriction, it just that @@ -23,8 +23,8 @@ class PettingZooEnv(MultiAgentEnv): games. Examples: - >>> from pettingzoo.butterfly import prison_v2 - >>> env = PettingZooEnv(prison_v2.env()) + >>> from pettingzoo.butterfly import prison_v3 + >>> env = PettingZooEnv(prison_v3.env()) >>> obs = env.reset() >>> print(obs) # only returns the observation for the agent which should be stepping @@ -68,34 +68,26 @@ class PettingZooEnv(MultiAgentEnv): def __init__(self, env): self.env = env - # agent idx list - self.agents = self.env.possible_agents - - # Get dictionaries of obs_spaces and act_spaces - self.observation_spaces = self.env.observation_spaces - self.action_spaces = self.env.action_spaces + env.reset() # Get first observation space, assuming all agents have equal space - self.observation_space = self.observation_spaces[self.agents[0]] + self.observation_space = self.env.observation_space(self.env.agents[0]) # Get first action space, assuming all agents have equal space - self.action_space = self.action_spaces[self.agents[0]] + self.action_space = self.env.action_space(self.env.agents[0]) - assert all(obs_space == self.observation_space - for obs_space - in self.env.observation_spaces.values()), \ + assert all(self.env.observation_space(agent) == self.observation_space + for agent in self.env.agents), \ "Observation spaces for all agents must be identical. Perhaps " \ "SuperSuit's pad_observations wrapper can help (useage: " \ "`supersuit.aec_wrappers.pad_observations(env)`" - assert all(act_space == self.action_space - for act_space in self.env.action_spaces.values()), \ + assert all(self.env.action_space(agent) == self.action_space + for agent in self.env.agents), \ "Action spaces for all agents must be identical. Perhaps " \ - "SuperSuit's pad_action_space wrapper can help (useage: " \ + "SuperSuit's pad_action_space wrapper can help (usage: " \ "`supersuit.aec_wrappers.pad_action_space(env)`" - self.reset() - def reset(self): self.env.reset() return { @@ -135,38 +127,36 @@ def seed(self, seed=None): def render(self, mode="human"): return self.env.render(mode) + @property + def get_sub_environments(self): + return self.env.unwrapped + class ParallelPettingZooEnv(MultiAgentEnv): def __init__(self, env): self.par_env = env - # agent idx list - self.agents = self.par_env.possible_agents - - # Get dictionaries of obs_spaces and act_spaces - self.observation_spaces = self.par_env.observation_spaces - self.action_spaces = self.par_env.action_spaces + self.par_env.reset() # Get first observation space, assuming all agents have equal space - self.observation_space = self.observation_spaces[self.agents[0]] + self.observation_space = self.par_env.observation_space( + self.par_env.agents[0]) # Get first action space, assuming all agents have equal space - self.action_space = self.action_spaces[self.agents[0]] + self.action_space = self.par_env.action_space(self.par_env.agents[0]) - assert all(obs_space == self.observation_space - for obs_space - in self.par_env.observation_spaces.values()), \ + assert all( + self.par_env.observation_space(agent) == self.observation_space + for agent in self.par_env.agents), \ "Observation spaces for all agents must be identical. Perhaps " \ "SuperSuit's pad_observations wrapper can help (useage: " \ "`supersuit.aec_wrappers.pad_observations(env)`" - assert all(act_space == self.action_space - for act_space in self.par_env.action_spaces.values()), \ + assert all(self.par_env.action_space(agent) == self.action_space + for agent in self.par_env.agents), \ "Action spaces for all agents must be identical. Perhaps " \ "SuperSuit's pad_action_space wrapper can help (useage: " \ "`supersuit.aec_wrappers.pad_action_space(env)`" - self.reset() - def reset(self): return self.par_env.reset() @@ -183,3 +173,7 @@ def seed(self, seed=None): def render(self, mode="human"): return self.par_env.render(mode) + + @property + def unwrapped(self): + return self.par_env.unwrapped diff --git a/rllib/examples/multi_agent_independent_learning.py b/rllib/examples/multi_agent_independent_learning.py index d425cc0654e6..b60237f278e8 100644 --- a/rllib/examples/multi_agent_independent_learning.py +++ b/rllib/examples/multi_agent_independent_learning.py @@ -1,7 +1,7 @@ from ray import tune from ray.tune.registry import register_env from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv -from pettingzoo.sisl import waterworld_v2 +from pettingzoo.sisl import waterworld_v3 # Based on code from github.com/parametersharingmadrl/parametersharingmadrl @@ -9,7 +9,7 @@ # RDQN - Rainbow DQN # ADQN - Apex DQN def env_creator(args): - return PettingZooEnv(waterworld_v2.env()) + return PettingZooEnv(waterworld_v3.env()) env = env_creator({}) register_env("waterworld", env_creator) diff --git a/rllib/examples/multi_agent_parameter_sharing.py b/rllib/examples/multi_agent_parameter_sharing.py index 5dae069237fd..a4ca7aa2f17b 100644 --- a/rllib/examples/multi_agent_parameter_sharing.py +++ b/rllib/examples/multi_agent_parameter_sharing.py @@ -1,7 +1,7 @@ from ray import tune from ray.tune.registry import register_env from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv -from pettingzoo.sisl import waterworld_v0 +from pettingzoo.sisl import waterworld_v3 # Based on code from github.com/parametersharingmadrl/parametersharingmadrl @@ -9,7 +9,7 @@ # RDQN - Rainbow DQN # ADQN - Apex DQN - register_env("waterworld", lambda _: PettingZooEnv(waterworld_v0.env())) + register_env("waterworld", lambda _: PettingZooEnv(waterworld_v3.env())) tune.run( "APEX_DDPG", diff --git a/rllib/tests/test_pettingzoo_env.py b/rllib/tests/test_pettingzoo_env.py index f2179e8173d4..140d3104e852 100644 --- a/rllib/tests/test_pettingzoo_env.py +++ b/rllib/tests/test_pettingzoo_env.py @@ -7,7 +7,7 @@ from ray.rllib.env import PettingZooEnv from ray.rllib.agents.registry import get_trainer_class -from pettingzoo.butterfly import pistonball_v4 +from pettingzoo.butterfly import pistonball_v5 from pettingzoo.mpe import simple_spread_v2 from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0 @@ -19,9 +19,9 @@ def setUp(self) -> None: def tearDown(self) -> None: ray.shutdown() - def test_pettingzoo_pistonball_v4_policies_are_dict_env(self): + def test_pettingzoo_pistonball_v5_policies_are_dict_env(self): def env_creator(config): - env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2)) + env = pistonball_v5.env() env = dtype_v0(env, dtype=float32) env = color_reduction_v0(env, mode="R") env = normalize_obs_v0(env) From 497e44cdba6b213dc71a871001f8620cb56c1bd4 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Mon, 3 Jan 2022 12:51:51 +0100 Subject: [PATCH 2/4] wip. --- python/requirements/ml/requirements_rllib.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt index f3585627317b..c84158ffc67f 100644 --- a/python/requirements/ml/requirements_rllib.txt +++ b/python/requirements/ml/requirements_rllib.txt @@ -11,7 +11,8 @@ kaggle_environments==1.7.11 # Unity3D testing mlagents_envs==0.27.0 # For tests on PettingZoo's multi-agent envs. -pettingzoo==1.14.0 +pettingzoo==1.14.0; python_version >= '3.7' +pettingzoo==1.12.0; python_version < '3.7' pymunk==6.0.0 supersuit==3.3.2 # For testing in MuJoCo-like envs (in PyBullet). From 701763cbbad7e76efe1d06177e67b1f321c2a97a Mon Sep 17 00:00:00 2001 From: sven1977 Date: Tue, 4 Jan 2022 09:01:08 +0100 Subject: [PATCH 3/4] wip. --- python/requirements/ml/requirements_rllib.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt index c84158ffc67f..e0faff262019 100644 --- a/python/requirements/ml/requirements_rllib.txt +++ b/python/requirements/ml/requirements_rllib.txt @@ -12,7 +12,6 @@ kaggle_environments==1.7.11 mlagents_envs==0.27.0 # For tests on PettingZoo's multi-agent envs. pettingzoo==1.14.0; python_version >= '3.7' -pettingzoo==1.12.0; python_version < '3.7' pymunk==6.0.0 supersuit==3.3.2 # For testing in MuJoCo-like envs (in PyBullet). From 9c3ca4e8f98c7fa940104192133b0d824c4b9f6f Mon Sep 17 00:00:00 2001 From: sven1977 Date: Tue, 4 Jan 2022 11:38:03 +0100 Subject: [PATCH 4/4] fix. --- python/requirements/ml/requirements_rllib.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt index e0faff262019..60020710848e 100644 --- a/python/requirements/ml/requirements_rllib.txt +++ b/python/requirements/ml/requirements_rllib.txt @@ -13,7 +13,7 @@ mlagents_envs==0.27.0 # For tests on PettingZoo's multi-agent envs. pettingzoo==1.14.0; python_version >= '3.7' pymunk==6.0.0 -supersuit==3.3.2 +supersuit==3.3.2; python_version >= '3.7' # For testing in MuJoCo-like envs (in PyBullet). pybullet==3.2.0 # For tests on RecSim and Kaggle envs.