[RLlib] Remove Bandits RecSim example for now, since it doesn't reall…

…y work. Revert "[RLlib] Add an env wrapper so RecSim works with our Bandits agent. (ray-project#22028)" This reverts commit 9c95b9a.
gjoliver · Feb 9, 2022 · b72dc5b · b72dc5b
1 parent d06317e
commit b72dc5b
Show file tree

Hide file tree

Showing 4 changed files with 2 additions and 127 deletions.
diff --git a/rllib/BUILD b/rllib/BUILD
@@ -2840,14 +2840,6 @@ py_test(
     srcs = ["examples/bandit/tune_lin_ucb_train_recommendation.py"],
 )
 
-py_test(
-    name = "examples/bandit/tune_lin_ucb_train_recsim_env",
-    main = "examples/bandit/tune_lin_ucb_train_recsim_env.py",
-    tags = ["team:ml", "examples", ],
-    size = "small",
-    srcs = ["examples/bandit/tune_lin_ucb_train_recsim_env.py"],
-)
-
 # --------------------------------------------------------------------
 # examples/documentation directory
 #

diff --git a/rllib/env/wrappers/recsim.py b/rllib/env/wrappers/recsim.py
@@ -64,49 +64,6 @@ def observation(self, obs):
         return new_obs
 
 
-class RecSimObservationBanditWrapper(gym.ObservationWrapper):
-    """Fix RecSim environment's observation format
-
-    RecSim's observations are keyed by document IDs, and nested under
-    "doc" key.
-    Our Bandits agent expects the observations to be flat 2D array
-    and under "item" key.
-
-    This environment wrapper converts obs into the right format.
-    """
-
-    def __init__(self, env: gym.Env):
-        super().__init__(env)
-        obs_space = self.env.observation_space
-
-        num_items = len(obs_space["doc"])
-        embedding_dim = next(iter(obs_space["doc"].values())).shape[-1]
-        self.observation_space = Dict(
-            OrderedDict(
-                [
-                    ("user", obs_space["user"]),
-                    (
-                        "item",
-                        gym.spaces.Box(
-                            low=-np.ones((num_items, embedding_dim)),
-                            high=np.ones((num_items, embedding_dim)),
-                        ),
-                    ),
-                    ("response", obs_space["response"]),
-                ]
-            )
-        )
-        self._sampled_obs = self.observation_space.sample()
-
-    def observation(self, obs):
-        new_obs = OrderedDict()
-        new_obs["user"] = obs["user"]
-        new_obs["item"] = np.vstack(list(obs["doc"].values()))
-        new_obs["response"] = obs["response"]
-        new_obs = convert_element_to_space_type(new_obs, self._sampled_obs)
-        return new_obs
-
-
 class RecSimResetWrapper(gym.Wrapper):
     """Fix RecSim environment's reset() and close() function
 
@@ -160,9 +117,7 @@ def action(self, action: int) -> List[int]:
 
 
 def recsim_gym_wrapper(
-    recsim_gym_env: gym.Env,
-    convert_to_discrete_action_space: bool = False,
-    wrap_for_bandits: bool = False,
+    recsim_gym_env: gym.Env, convert_to_discrete_action_space: bool = False
 ) -> gym.Env:
     """Makes sure a RecSim gym.Env can ba handled by RLlib.
 
@@ -186,8 +141,6 @@ def recsim_gym_wrapper(
             such as RLlib's DQN. If None, `convert_to_discrete_action_space`
             may also be provided via the EnvContext (config) when creating an
             actual env instance.
-        wrap_for_bandits: Bool indicating, whether this RecSim env should be
-            wrapped for use with our Bandits agent.
 
     Returns:
         An RLlib-ready gym.Env instance.
@@ -196,8 +149,6 @@ def recsim_gym_wrapper(
     env = RecSimObservationSpaceWrapper(env)
     if convert_to_discrete_action_space:
         env = MultiDiscreteToDiscreteActionWrapper(env)
-    if wrap_for_bandits:
-        env = RecSimObservationBanditWrapper(env)
     return env
 
 
@@ -235,7 +186,6 @@ def __init__(self, config: Optional[EnvContext] = None):
                 "resample_documents": True,
                 "seed": 0,
                 "convert_to_discrete_action_space": False,
-                "wrap_for_bandits": False,
             }
             if config is None or isinstance(config, dict):
                 config = EnvContext(config or default_config, worker_index=0)
@@ -260,9 +210,7 @@ def __init__(self, config: Optional[EnvContext] = None):
             # Fix observation space and - if necessary - convert to discrete
             # action space (from multi-discrete).
             env = recsim_gym_wrapper(
-                gym_env,
-                config["convert_to_discrete_action_space"],
-                config["wrap_for_bandits"],
+                gym_env, env_ctx["convert_to_discrete_action_space"]
             )
             # Call the super (Wrapper constructor) passing it the created env.
             super().__init__(env=env)

diff --git a/rllib/env/wrappers/tests/test_recsim_wrapper.py b/rllib/env/wrappers/tests/test_recsim_wrapper.py
@@ -28,11 +28,6 @@ def test_action_space_conversion(self):
         new_obs, _, _, _ = env.step(action)
         self.assertTrue(env.observation_space.contains(new_obs))
 
-    def test_bandits_observation_space_conversion(self):
-        env = InterestEvolutionRecSimEnv({"wrap_for_bandits": True})
-        # "item" of observation space is a Box space.
-        self.assertIsInstance(env.observation_space["item"], gym.spaces.Box)
-
     def test_double_action_space_conversion_raises_exception(self):
         env = InterestEvolutionRecSimEnv({"convert_to_discrete_action_space": True})
         with self.assertRaises(UnsupportedSpaceException):

diff --git a/rllib/examples/bandit/tune_lin_ucb_train_recsim_env.py b/rllib/examples/bandit/tune_lin_ucb_train_recsim_env.py