From a003833ed64df450e4b763f1cc19c888e20f1aa0 Mon Sep 17 00:00:00 2001
From: Sven Mika <sven@anyscale.io>
Date: Mon, 3 Jan 2022 12:15:53 +0100
Subject: [PATCH 1/4] Revert "Revert "[RLlib] Updated pettingzoo wrappers, env
 versions, urls (#20113)" (#21338)"

This reverts commit 489e6945a671b7af2cf1f047b2ad2c879286ec73.
---
 .buildkite/pipeline.gpu.large.yml             |  4 +-
 doc/source/rllib-env.rst                      |  6 +-
 python/requirements/ml/requirements_rllib.txt |  4 +-
 rllib/env/tests/test_remote_worker_envs.py    |  4 +-
 rllib/env/wrappers/pettingzoo_env.py          | 64 +++++++++----------
 .../multi_agent_independent_learning.py       |  4 +-
 .../examples/multi_agent_parameter_sharing.py |  4 +-
 rllib/tests/test_pettingzoo_env.py            |  6 +-
 8 files changed, 46 insertions(+), 50 deletions(-)

diff --git a/.buildkite/pipeline.gpu.large.yml b/.buildkite/pipeline.gpu.large.yml
index 034480c71bdb..7aa3a3b098ff 100644
--- a/.buildkite/pipeline.gpu.large.yml
+++ b/.buildkite/pipeline.gpu.large.yml
@@ -22,7 +22,9 @@
   conditions: ["RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 ./ci/travis/install-dependencies.sh
+    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/travis/install-dependencies.sh
+    # Because Python version changed, we need to re-install Ray here
+    - rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/travis/ci.sh build
     - pip install -Ur ./python/requirements_ml_docker.txt
     - ./ci/travis/env_info.sh
     # --jobs 2 is necessary as we only need to have at least 2 gpus on the machine
diff --git a/doc/source/rllib-env.rst b/doc/source/rllib-env.rst
index bef5ad40ab06..975868d58090 100644
--- a/doc/source/rllib-env.rst
+++ b/doc/source/rllib-env.rst
@@ -213,17 +213,17 @@ To scale to hundreds of agents, MultiAgentEnv batches policy evaluations across
 PettingZoo Multi-Agent Environments
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-`PettingZoo <https://github.com/PettingZoo-Team/PettingZoo>`__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example
+`PettingZoo <https://github.com/Farama-Foundation/PettingZoo>`__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example
 
 .. code-block:: python
 
     from ray.tune.registry import register_env
     # import the pettingzoo environment
-    from pettingzoo.butterfly import prison_v2
+    from pettingzoo.butterfly import prison_v3
     # import rllib pettingzoo interface
     from ray.rllib.env import PettingZooEnv
     # define how to make the environment. This way takes an optional environment config, num_floors
-    env_creator = lambda config: prison_v2.env(num_floors=config.get("num_floors", 4))
+    env_creator = lambda config: prison_v3.env(num_floors=config.get("num_floors", 4))
     # register that way to make the environment under an rllib name
     register_env('prison', lambda config: PettingZooEnv(env_creator(config)))
     # now you can use `prison` as an environment
diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt
index a9b500e40165..f3585627317b 100644
--- a/python/requirements/ml/requirements_rllib.txt
+++ b/python/requirements/ml/requirements_rllib.txt
@@ -11,9 +11,9 @@ kaggle_environments==1.7.11
 # Unity3D testing
 mlagents_envs==0.27.0
 # For tests on PettingZoo's multi-agent envs.
-pettingzoo==1.11.1
+pettingzoo==1.14.0
 pymunk==6.0.0
-supersuit==2.6.6
+supersuit==3.3.2
 # For testing in MuJoCo-like envs (in PyBullet).
 pybullet==3.2.0
 # For tests on RecSim and Kaggle envs.
diff --git a/rllib/env/tests/test_remote_worker_envs.py b/rllib/env/tests/test_remote_worker_envs.py
index fac76c8f26d7..7c0627966cff 100644
--- a/rllib/env/tests/test_remote_worker_envs.py
+++ b/rllib/env/tests/test_remote_worker_envs.py
@@ -1,6 +1,6 @@
 import gym
 import numpy as np
-from pettingzoo.butterfly import pistonball_v4
+from pettingzoo.butterfly import pistonball_v5
 from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0
 import unittest
 
@@ -15,7 +15,7 @@
 
 # Function that outputs the environment you wish to register.
 def env_creator(config):
-    env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2))
+    env = pistonball_v5.env()
     env = dtype_v0(env, dtype=np.float32)
     env = color_reduction_v0(env, mode="R")
     env = normalize_obs_v0(env)
diff --git a/rllib/env/wrappers/pettingzoo_env.py b/rllib/env/wrappers/pettingzoo_env.py
index 9c45b6224cdc..028f90073a31 100644
--- a/rllib/env/wrappers/pettingzoo_env.py
+++ b/rllib/env/wrappers/pettingzoo_env.py
@@ -4,7 +4,7 @@
 class PettingZooEnv(MultiAgentEnv):
     """An interface to the PettingZoo MARL environment library.
 
-    See: https://github.com/PettingZoo-Team/PettingZoo
+    See: https://github.com/Farama-Foundation/PettingZoo
 
     Inherits from MultiAgentEnv and exposes a given AEC
     (actor-environment-cycle) game from the PettingZoo project via the
@@ -15,7 +15,7 @@ class PettingZooEnv(MultiAgentEnv):
     1. All agents have the same action_spaces and observation_spaces.
        Note: If, within your aec game, agents do not have homogeneous action /
        observation spaces, apply SuperSuit wrappers
-       to apply padding functionality: https://github.com/PettingZoo-Team/
+       to apply padding functionality: https://github.com/Farama-Foundation/
        SuperSuit#built-in-multi-agent-only-functions
     2. Environments are positive sum games (-> Agents are expected to cooperate
        to maximize reward). This isn't a hard restriction, it just that
@@ -23,8 +23,8 @@ class PettingZooEnv(MultiAgentEnv):
        games.
 
     Examples:
-        >>> from pettingzoo.butterfly import prison_v2
-        >>> env = PettingZooEnv(prison_v2.env())
+        >>> from pettingzoo.butterfly import prison_v3
+        >>> env = PettingZooEnv(prison_v3.env())
         >>> obs = env.reset()
         >>> print(obs)
         # only returns the observation for the agent which should be stepping
@@ -68,34 +68,26 @@ class PettingZooEnv(MultiAgentEnv):
 
     def __init__(self, env):
         self.env = env
-        # agent idx list
-        self.agents = self.env.possible_agents
-
-        # Get dictionaries of obs_spaces and act_spaces
-        self.observation_spaces = self.env.observation_spaces
-        self.action_spaces = self.env.action_spaces
+        env.reset()
 
         # Get first observation space, assuming all agents have equal space
-        self.observation_space = self.observation_spaces[self.agents[0]]
+        self.observation_space = self.env.observation_space(self.env.agents[0])
 
         # Get first action space, assuming all agents have equal space
-        self.action_space = self.action_spaces[self.agents[0]]
+        self.action_space = self.env.action_space(self.env.agents[0])
 
-        assert all(obs_space == self.observation_space
-                   for obs_space
-                   in self.env.observation_spaces.values()), \
+        assert all(self.env.observation_space(agent) == self.observation_space
+                   for agent in self.env.agents), \
             "Observation spaces for all agents must be identical. Perhaps " \
             "SuperSuit's pad_observations wrapper can help (useage: " \
             "`supersuit.aec_wrappers.pad_observations(env)`"
 
-        assert all(act_space == self.action_space
-                   for act_space in self.env.action_spaces.values()), \
+        assert all(self.env.action_space(agent) == self.action_space
+                   for agent in self.env.agents), \
             "Action spaces for all agents must be identical. Perhaps " \
-            "SuperSuit's pad_action_space wrapper can help (useage: " \
+            "SuperSuit's pad_action_space wrapper can help (usage: " \
             "`supersuit.aec_wrappers.pad_action_space(env)`"
 
-        self.reset()
-
     def reset(self):
         self.env.reset()
         return {
@@ -135,38 +127,36 @@ def seed(self, seed=None):
     def render(self, mode="human"):
         return self.env.render(mode)
 
+    @property
+    def get_sub_environments(self):
+        return self.env.unwrapped
+
 
 class ParallelPettingZooEnv(MultiAgentEnv):
     def __init__(self, env):
         self.par_env = env
-        # agent idx list
-        self.agents = self.par_env.possible_agents
-
-        # Get dictionaries of obs_spaces and act_spaces
-        self.observation_spaces = self.par_env.observation_spaces
-        self.action_spaces = self.par_env.action_spaces
+        self.par_env.reset()
 
         # Get first observation space, assuming all agents have equal space
-        self.observation_space = self.observation_spaces[self.agents[0]]
+        self.observation_space = self.par_env.observation_space(
+            self.par_env.agents[0])
 
         # Get first action space, assuming all agents have equal space
-        self.action_space = self.action_spaces[self.agents[0]]
+        self.action_space = self.par_env.action_space(self.par_env.agents[0])
 
-        assert all(obs_space == self.observation_space
-                   for obs_space
-                   in self.par_env.observation_spaces.values()), \
+        assert all(
+            self.par_env.observation_space(agent) == self.observation_space
+            for agent in self.par_env.agents), \
             "Observation spaces for all agents must be identical. Perhaps " \
             "SuperSuit's pad_observations wrapper can help (useage: " \
             "`supersuit.aec_wrappers.pad_observations(env)`"
 
-        assert all(act_space == self.action_space
-                   for act_space in self.par_env.action_spaces.values()), \
+        assert all(self.par_env.action_space(agent) == self.action_space
+                   for agent in self.par_env.agents), \
             "Action spaces for all agents must be identical. Perhaps " \
             "SuperSuit's pad_action_space wrapper can help (useage: " \
             "`supersuit.aec_wrappers.pad_action_space(env)`"
 
-        self.reset()
-
     def reset(self):
         return self.par_env.reset()
 
@@ -183,3 +173,7 @@ def seed(self, seed=None):
 
     def render(self, mode="human"):
         return self.par_env.render(mode)
+
+    @property
+    def unwrapped(self):
+        return self.par_env.unwrapped
diff --git a/rllib/examples/multi_agent_independent_learning.py b/rllib/examples/multi_agent_independent_learning.py
index d425cc0654e6..b60237f278e8 100644
--- a/rllib/examples/multi_agent_independent_learning.py
+++ b/rllib/examples/multi_agent_independent_learning.py
@@ -1,7 +1,7 @@
 from ray import tune
 from ray.tune.registry import register_env
 from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
-from pettingzoo.sisl import waterworld_v2
+from pettingzoo.sisl import waterworld_v3
 
 # Based on code from github.com/parametersharingmadrl/parametersharingmadrl
 
@@ -9,7 +9,7 @@
     # RDQN - Rainbow DQN
     # ADQN - Apex DQN
     def env_creator(args):
-        return PettingZooEnv(waterworld_v2.env())
+        return PettingZooEnv(waterworld_v3.env())
 
     env = env_creator({})
     register_env("waterworld", env_creator)
diff --git a/rllib/examples/multi_agent_parameter_sharing.py b/rllib/examples/multi_agent_parameter_sharing.py
index 5dae069237fd..a4ca7aa2f17b 100644
--- a/rllib/examples/multi_agent_parameter_sharing.py
+++ b/rllib/examples/multi_agent_parameter_sharing.py
@@ -1,7 +1,7 @@
 from ray import tune
 from ray.tune.registry import register_env
 from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
-from pettingzoo.sisl import waterworld_v0
+from pettingzoo.sisl import waterworld_v3
 
 # Based on code from github.com/parametersharingmadrl/parametersharingmadrl
 
@@ -9,7 +9,7 @@
     # RDQN - Rainbow DQN
     # ADQN - Apex DQN
 
-    register_env("waterworld", lambda _: PettingZooEnv(waterworld_v0.env()))
+    register_env("waterworld", lambda _: PettingZooEnv(waterworld_v3.env()))
 
     tune.run(
         "APEX_DDPG",
diff --git a/rllib/tests/test_pettingzoo_env.py b/rllib/tests/test_pettingzoo_env.py
index f2179e8173d4..140d3104e852 100644
--- a/rllib/tests/test_pettingzoo_env.py
+++ b/rllib/tests/test_pettingzoo_env.py
@@ -7,7 +7,7 @@
 from ray.rllib.env import PettingZooEnv
 from ray.rllib.agents.registry import get_trainer_class
 
-from pettingzoo.butterfly import pistonball_v4
+from pettingzoo.butterfly import pistonball_v5
 from pettingzoo.mpe import simple_spread_v2
 from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0
 
@@ -19,9 +19,9 @@ def setUp(self) -> None:
     def tearDown(self) -> None:
         ray.shutdown()
 
-    def test_pettingzoo_pistonball_v4_policies_are_dict_env(self):
+    def test_pettingzoo_pistonball_v5_policies_are_dict_env(self):
         def env_creator(config):
-            env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2))
+            env = pistonball_v5.env()
             env = dtype_v0(env, dtype=float32)
             env = color_reduction_v0(env, mode="R")
             env = normalize_obs_v0(env)

From 497e44cdba6b213dc71a871001f8620cb56c1bd4 Mon Sep 17 00:00:00 2001
From: sven1977 <svenmika1977@gmail.com>
Date: Mon, 3 Jan 2022 12:51:51 +0100
Subject: [PATCH 2/4] wip.

---
 python/requirements/ml/requirements_rllib.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt
index f3585627317b..c84158ffc67f 100644
--- a/python/requirements/ml/requirements_rllib.txt
+++ b/python/requirements/ml/requirements_rllib.txt
@@ -11,7 +11,8 @@ kaggle_environments==1.7.11
 # Unity3D testing
 mlagents_envs==0.27.0
 # For tests on PettingZoo's multi-agent envs.
-pettingzoo==1.14.0
+pettingzoo==1.14.0; python_version >= '3.7'
+pettingzoo==1.12.0; python_version < '3.7'
 pymunk==6.0.0
 supersuit==3.3.2
 # For testing in MuJoCo-like envs (in PyBullet).

From 701763cbbad7e76efe1d06177e67b1f321c2a97a Mon Sep 17 00:00:00 2001
From: sven1977 <svenmika1977@gmail.com>
Date: Tue, 4 Jan 2022 09:01:08 +0100
Subject: [PATCH 3/4] wip.

---
 python/requirements/ml/requirements_rllib.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt
index c84158ffc67f..e0faff262019 100644
--- a/python/requirements/ml/requirements_rllib.txt
+++ b/python/requirements/ml/requirements_rllib.txt
@@ -12,7 +12,6 @@ kaggle_environments==1.7.11
 mlagents_envs==0.27.0
 # For tests on PettingZoo's multi-agent envs.
 pettingzoo==1.14.0; python_version >= '3.7'
-pettingzoo==1.12.0; python_version < '3.7'
 pymunk==6.0.0
 supersuit==3.3.2
 # For testing in MuJoCo-like envs (in PyBullet).

From 9c3ca4e8f98c7fa940104192133b0d824c4b9f6f Mon Sep 17 00:00:00 2001
From: sven1977 <svenmika1977@gmail.com>
Date: Tue, 4 Jan 2022 11:38:03 +0100
Subject: [PATCH 4/4] fix.

---
 python/requirements/ml/requirements_rllib.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt
index e0faff262019..60020710848e 100644
--- a/python/requirements/ml/requirements_rllib.txt
+++ b/python/requirements/ml/requirements_rllib.txt
@@ -13,7 +13,7 @@ mlagents_envs==0.27.0
 # For tests on PettingZoo's multi-agent envs.
 pettingzoo==1.14.0; python_version >= '3.7'
 pymunk==6.0.0
-supersuit==3.3.2
+supersuit==3.3.2; python_version >= '3.7'
 # For testing in MuJoCo-like envs (in PyBullet).
 pybullet==3.2.0
 # For tests on RecSim and Kaggle envs.