ray-project · sven1977 · Jan 4, 2022 · Jan 3, 2022 · Jan 3, 2022 · Jan 3, 2022
diff --git a/.buildkite/pipeline.gpu.large.yml b/.buildkite/pipeline.gpu.large.yml
@@ -22,7 +22,9 @@
   conditions: ["RAY_CI_RLLIB_AFFECTED"]
   commands:
     - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 ./ci/travis/install-dependencies.sh
+    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/travis/install-dependencies.sh
+    # Because Python version changed, we need to re-install Ray here
+    - rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/travis/ci.sh build
     - pip install -Ur ./python/requirements_ml_docker.txt
     - ./ci/travis/env_info.sh
     # --jobs 2 is necessary as we only need to have at least 2 gpus on the machine

@@ -213,17 +213,17 @@ To scale to hundreds of agents, MultiAgentEnv batches policy evaluations across
 PettingZoo Multi-Agent Environments
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-`PettingZoo <https://github.com/PettingZoo-Team/PettingZoo>`__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example
+`PettingZoo <https://github.com/Farama-Foundation/PettingZoo>`__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example
 
 .. code-block:: python
 
     from ray.tune.registry import register_env
     # import the pettingzoo environment
-    from pettingzoo.butterfly import prison_v2
+    from pettingzoo.butterfly import prison_v3
     # import rllib pettingzoo interface
     from ray.rllib.env import PettingZooEnv
     # define how to make the environment. This way takes an optional environment config, num_floors
-    env_creator = lambda config: prison_v2.env(num_floors=config.get("num_floors", 4))
+    env_creator = lambda config: prison_v3.env(num_floors=config.get("num_floors", 4))
     # register that way to make the environment under an rllib name
     register_env('prison', lambda config: PettingZooEnv(env_creator(config)))
     # now you can use `prison` as an environment

diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/requirements_rllib.txt
@@ -11,9 +11,9 @@ kaggle_environments==1.7.11
 # Unity3D testing
 mlagents_envs==0.27.0
 # For tests on PettingZoo's multi-agent envs.
-pettingzoo==1.11.1
+pettingzoo==1.14.0; python_version >= '3.7'
 pymunk==6.0.0
-supersuit==2.6.6
+supersuit==3.3.2; python_version >= '3.7'
 # For testing in MuJoCo-like envs (in PyBullet).
 pybullet==3.2.0
 # For tests on RecSim and Kaggle envs.

@@ -1,6 +1,6 @@
 import gym
 import numpy as np
-from pettingzoo.butterfly import pistonball_v4
+from pettingzoo.butterfly import pistonball_v5
 from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0
 import unittest
 
@@ -15,7 +15,7 @@
 
 # Function that outputs the environment you wish to register.
 def env_creator(config):
-    env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2))
+    env = pistonball_v5.env()
     env = dtype_v0(env, dtype=np.float32)
     env = color_reduction_v0(env, mode="R")
     env = normalize_obs_v0(env)

@@ -4,7 +4,7 @@
 class PettingZooEnv(MultiAgentEnv):
     """An interface to the PettingZoo MARL environment library.
 
-    See: https://github.com/PettingZoo-Team/PettingZoo
+    See: https://github.com/Farama-Foundation/PettingZoo
 
     Inherits from MultiAgentEnv and exposes a given AEC
     (actor-environment-cycle) game from the PettingZoo project via the
@@ -15,16 +15,16 @@ class PettingZooEnv(MultiAgentEnv):
     1. All agents have the same action_spaces and observation_spaces.
        Note: If, within your aec game, agents do not have homogeneous action /
        observation spaces, apply SuperSuit wrappers
-       to apply padding functionality: https://github.com/PettingZoo-Team/
+       to apply padding functionality: https://github.com/Farama-Foundation/
        SuperSuit#built-in-multi-agent-only-functions
     2. Environments are positive sum games (-> Agents are expected to cooperate
        to maximize reward). This isn't a hard restriction, it just that
        standard algorithms aren't expected to work well in highly competitive
        games.
 
     Examples:
-        >>> from pettingzoo.butterfly import prison_v2
-        >>> env = PettingZooEnv(prison_v2.env())
+        >>> from pettingzoo.butterfly import prison_v3
+        >>> env = PettingZooEnv(prison_v3.env())
         >>> obs = env.reset()
         >>> print(obs)
         # only returns the observation for the agent which should be stepping
@@ -68,34 +68,26 @@ class PettingZooEnv(MultiAgentEnv):
 
     def __init__(self, env):
         self.env = env
-        # agent idx list
-        self.agents = self.env.possible_agents
-
-        # Get dictionaries of obs_spaces and act_spaces
-        self.observation_spaces = self.env.observation_spaces
-        self.action_spaces = self.env.action_spaces
+        env.reset()
 
         # Get first observation space, assuming all agents have equal space
-        self.observation_space = self.observation_spaces[self.agents[0]]
+        self.observation_space = self.env.observation_space(self.env.agents[0])
 
         # Get first action space, assuming all agents have equal space
-        self.action_space = self.action_spaces[self.agents[0]]
+        self.action_space = self.env.action_space(self.env.agents[0])
 
-        assert all(obs_space == self.observation_space
-                   for obs_space
-                   in self.env.observation_spaces.values()), \
+        assert all(self.env.observation_space(agent) == self.observation_space
+                   for agent in self.env.agents), \
             "Observation spaces for all agents must be identical. Perhaps " \
             "SuperSuit's pad_observations wrapper can help (useage: " \
             "`supersuit.aec_wrappers.pad_observations(env)`"
 
-        assert all(act_space == self.action_space
-                   for act_space in self.env.action_spaces.values()), \
+        assert all(self.env.action_space(agent) == self.action_space
+                   for agent in self.env.agents), \
             "Action spaces for all agents must be identical. Perhaps " \
-            "SuperSuit's pad_action_space wrapper can help (useage: " \
+            "SuperSuit's pad_action_space wrapper can help (usage: " \
             "`supersuit.aec_wrappers.pad_action_space(env)`"
 
-        self.reset()
-
     def reset(self):
         self.env.reset()
         return {
@@ -135,38 +127,36 @@ def seed(self, seed=None):
     def render(self, mode="human"):
         return self.env.render(mode)
 
+    @property
+    def get_sub_environments(self):
+        return self.env.unwrapped
+
 
 class ParallelPettingZooEnv(MultiAgentEnv):
     def __init__(self, env):
         self.par_env = env
-        # agent idx list
-        self.agents = self.par_env.possible_agents
-
-        # Get dictionaries of obs_spaces and act_spaces
-        self.observation_spaces = self.par_env.observation_spaces
-        self.action_spaces = self.par_env.action_spaces
+        self.par_env.reset()
 
         # Get first observation space, assuming all agents have equal space
-        self.observation_space = self.observation_spaces[self.agents[0]]
+        self.observation_space = self.par_env.observation_space(
+            self.par_env.agents[0])
 
         # Get first action space, assuming all agents have equal space
-        self.action_space = self.action_spaces[self.agents[0]]
+        self.action_space = self.par_env.action_space(self.par_env.agents[0])
 
-        assert all(obs_space == self.observation_space
-                   for obs_space
-                   in self.par_env.observation_spaces.values()), \
+        assert all(
+            self.par_env.observation_space(agent) == self.observation_space
+            for agent in self.par_env.agents), \
             "Observation spaces for all agents must be identical. Perhaps " \
             "SuperSuit's pad_observations wrapper can help (useage: " \
             "`supersuit.aec_wrappers.pad_observations(env)`"
 
-        assert all(act_space == self.action_space
-                   for act_space in self.par_env.action_spaces.values()), \
+        assert all(self.par_env.action_space(agent) == self.action_space
+                   for agent in self.par_env.agents), \
             "Action spaces for all agents must be identical. Perhaps " \
             "SuperSuit's pad_action_space wrapper can help (useage: " \
             "`supersuit.aec_wrappers.pad_action_space(env)`"
 
-        self.reset()
-
     def reset(self):
         return self.par_env.reset()
 
@@ -183,3 +173,7 @@ def seed(self, seed=None):
 
     def render(self, mode="human"):
         return self.par_env.render(mode)
+
+    @property
+    def unwrapped(self):
+        return self.par_env.unwrapped
@@ -1,15 +1,15 @@
 from ray import tune
 from ray.tune.registry import register_env
 from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
-from pettingzoo.sisl import waterworld_v2
+from pettingzoo.sisl import waterworld_v3
 
 # Based on code from github.com/parametersharingmadrl/parametersharingmadrl
 
 if __name__ == "__main__":
     # RDQN - Rainbow DQN
     # ADQN - Apex DQN
     def env_creator(args):
-        return PettingZooEnv(waterworld_v2.env())
+        return PettingZooEnv(waterworld_v3.env())
 
     env = env_creator({})
     register_env("waterworld", env_creator)

@@ -1,15 +1,15 @@
 from ray import tune
 from ray.tune.registry import register_env
 from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
-from pettingzoo.sisl import waterworld_v0
+from pettingzoo.sisl import waterworld_v3
 
 # Based on code from github.com/parametersharingmadrl/parametersharingmadrl
 
 if __name__ == "__main__":
     # RDQN - Rainbow DQN
     # ADQN - Apex DQN
 
-    register_env("waterworld", lambda _: PettingZooEnv(waterworld_v0.env()))
+    register_env("waterworld", lambda _: PettingZooEnv(waterworld_v3.env()))
 
     tune.run(
         "APEX_DDPG",

@@ -7,7 +7,7 @@
 from ray.rllib.env import PettingZooEnv
 from ray.rllib.agents.registry import get_trainer_class
 
-from pettingzoo.butterfly import pistonball_v4
+from pettingzoo.butterfly import pistonball_v5
 from pettingzoo.mpe import simple_spread_v2
 from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0
 
@@ -19,9 +19,9 @@ def setUp(self) -> None:
     def tearDown(self) -> None:
         ray.shutdown()
 
-    def test_pettingzoo_pistonball_v4_policies_are_dict_env(self):
+    def test_pettingzoo_pistonball_v5_policies_are_dict_env(self):
         def env_creator(config):
-            env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2))
+            env = pistonball_v5.env()
             env = dtype_v0(env, dtype=float32)
             env = color_reduction_v0(env, mode="R")
             env = normalize_obs_v0(env)