Skip to content

Commit

Permalink
[RLlib] Remove Bandits RecSim example for now, since it doesn't reall…
Browse files Browse the repository at this point in the history
…y work.

Revert "[RLlib] Add an env wrapper so RecSim works with our Bandits agent. (ray-project#22028)"

This reverts commit 9c95b9a.
  • Loading branch information
Jun Gong committed Feb 9, 2022
1 parent d06317e commit b72dc5b
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 127 deletions.
8 changes: 0 additions & 8 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2840,14 +2840,6 @@ py_test(
srcs = ["examples/bandit/tune_lin_ucb_train_recommendation.py"],
)

py_test(
name = "examples/bandit/tune_lin_ucb_train_recsim_env",
main = "examples/bandit/tune_lin_ucb_train_recsim_env.py",
tags = ["team:ml", "examples", ],
size = "small",
srcs = ["examples/bandit/tune_lin_ucb_train_recsim_env.py"],
)

# --------------------------------------------------------------------
# examples/documentation directory
#
Expand Down
56 changes: 2 additions & 54 deletions rllib/env/wrappers/recsim.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,49 +64,6 @@ def observation(self, obs):
return new_obs


class RecSimObservationBanditWrapper(gym.ObservationWrapper):
"""Fix RecSim environment's observation format
RecSim's observations are keyed by document IDs, and nested under
"doc" key.
Our Bandits agent expects the observations to be flat 2D array
and under "item" key.
This environment wrapper converts obs into the right format.
"""

def __init__(self, env: gym.Env):
super().__init__(env)
obs_space = self.env.observation_space

num_items = len(obs_space["doc"])
embedding_dim = next(iter(obs_space["doc"].values())).shape[-1]
self.observation_space = Dict(
OrderedDict(
[
("user", obs_space["user"]),
(
"item",
gym.spaces.Box(
low=-np.ones((num_items, embedding_dim)),
high=np.ones((num_items, embedding_dim)),
),
),
("response", obs_space["response"]),
]
)
)
self._sampled_obs = self.observation_space.sample()

def observation(self, obs):
new_obs = OrderedDict()
new_obs["user"] = obs["user"]
new_obs["item"] = np.vstack(list(obs["doc"].values()))
new_obs["response"] = obs["response"]
new_obs = convert_element_to_space_type(new_obs, self._sampled_obs)
return new_obs


class RecSimResetWrapper(gym.Wrapper):
"""Fix RecSim environment's reset() and close() function
Expand Down Expand Up @@ -160,9 +117,7 @@ def action(self, action: int) -> List[int]:


def recsim_gym_wrapper(
recsim_gym_env: gym.Env,
convert_to_discrete_action_space: bool = False,
wrap_for_bandits: bool = False,
recsim_gym_env: gym.Env, convert_to_discrete_action_space: bool = False
) -> gym.Env:
"""Makes sure a RecSim gym.Env can ba handled by RLlib.
Expand All @@ -186,8 +141,6 @@ def recsim_gym_wrapper(
such as RLlib's DQN. If None, `convert_to_discrete_action_space`
may also be provided via the EnvContext (config) when creating an
actual env instance.
wrap_for_bandits: Bool indicating, whether this RecSim env should be
wrapped for use with our Bandits agent.
Returns:
An RLlib-ready gym.Env instance.
Expand All @@ -196,8 +149,6 @@ def recsim_gym_wrapper(
env = RecSimObservationSpaceWrapper(env)
if convert_to_discrete_action_space:
env = MultiDiscreteToDiscreteActionWrapper(env)
if wrap_for_bandits:
env = RecSimObservationBanditWrapper(env)
return env


Expand Down Expand Up @@ -235,7 +186,6 @@ def __init__(self, config: Optional[EnvContext] = None):
"resample_documents": True,
"seed": 0,
"convert_to_discrete_action_space": False,
"wrap_for_bandits": False,
}
if config is None or isinstance(config, dict):
config = EnvContext(config or default_config, worker_index=0)
Expand All @@ -260,9 +210,7 @@ def __init__(self, config: Optional[EnvContext] = None):
# Fix observation space and - if necessary - convert to discrete
# action space (from multi-discrete).
env = recsim_gym_wrapper(
gym_env,
config["convert_to_discrete_action_space"],
config["wrap_for_bandits"],
gym_env, env_ctx["convert_to_discrete_action_space"]
)
# Call the super (Wrapper constructor) passing it the created env.
super().__init__(env=env)
Expand Down
5 changes: 0 additions & 5 deletions rllib/env/wrappers/tests/test_recsim_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@ def test_action_space_conversion(self):
new_obs, _, _, _ = env.step(action)
self.assertTrue(env.observation_space.contains(new_obs))

def test_bandits_observation_space_conversion(self):
env = InterestEvolutionRecSimEnv({"wrap_for_bandits": True})
# "item" of observation space is a Box space.
self.assertIsInstance(env.observation_space["item"], gym.spaces.Box)

def test_double_action_space_conversion_raises_exception(self):
env = InterestEvolutionRecSimEnv({"convert_to_discrete_action_space": True})
with self.assertRaises(UnsupportedSpaceException):
Expand Down
60 changes: 0 additions & 60 deletions rllib/examples/bandit/tune_lin_ucb_train_recsim_env.py

This file was deleted.

0 comments on commit b72dc5b

Please sign in to comment.