Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] Cleanup examples folder #16: Add missing docstrings to 2 connector example scripts. #45864

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 27 additions & 25 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2086,6 +2086,27 @@ py_test(
# tagged by @OldAPIStack and/or @HybridAPIStack
# ----------------------

# subdirectory: actions/

# Nested action spaces (flattening obs and learning w/ multi-action distribution).
py_test(
name = "examples/actions/nested_action_spaces_ppo",
main = "examples/actions/nested_action_spaces.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "large",
srcs = ["examples/actions/nested_action_spaces.py"],
args = ["--enable-new-api-stack", "--as-test", "--framework=torch", "--stop-reward=-500.0", "--algo=PPO"]
)

py_test(
name = "examples/actions/nested_action_spaces_multi_agent_ppo",
main = "examples/actions/nested_action_spaces.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "large",
srcs = ["examples/actions/nested_action_spaces.py"],
args = ["--enable-new-api-stack", "--as-test", "--num-agents=2", "--framework=torch", "--stop-reward=-1000.0", "--algo=PPO"]
)

# subdirectory: algorithms/

#@OldAPIStack
Expand Down Expand Up @@ -2213,41 +2234,22 @@ py_test(
args = ["--enable-new-api-stack", "--num-agents=2", "--stop-iter=2", "--framework=torch", "--algo=PPO", "--num-env-runners=4", "--num-cpus=6"]
)

# Nested action spaces (flattening obs and learning w/ multi-action distribution).
py_test(
name = "examples/connectors/nested_action_spaces_ppo",
main = "examples/connectors/nested_action_spaces.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "large",
srcs = ["examples/connectors/nested_action_spaces.py"],
args = ["--enable-new-api-stack", "--as-test", "--framework=torch", "--stop-reward=-500.0", "--algo=PPO"]
)

py_test(
name = "examples/connectors/nested_action_spaces_multi_agent_ppo",
main = "examples/connectors/nested_action_spaces.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "large",
srcs = ["examples/connectors/nested_action_spaces.py"],
args = ["--enable-new-api-stack", "--as-test", "--num-agents=2", "--framework=torch", "--stop-reward=-1000.0", "--algo=PPO"]
)

# Nested observation spaces (flattening).
py_test(
name = "examples/connectors/nested_observation_spaces_ppo",
main = "examples/connectors/nested_observation_spaces.py",
name = "examples/connectors/flatten_observations_dict_space_ppo",
main = "examples/connectors/flatten_observations_dict_space.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/connectors/nested_observation_spaces.py"],
srcs = ["examples/connectors/flatten_observations_dict_space.py"],
args = ["--enable-new-api-stack", "--as-test", "--stop-reward=400.0", "--framework=torch", "--algo=PPO"]
)

py_test(
name = "examples/connectors/nested_observation_spaces_multi_agent_ppo",
main = "examples/connectors/nested_observation_spaces.py",
name = "examples/connectors/flatten_observations_dict_space_multi_agent_ppo",
main = "examples/connectors/flatten_observations_dict_space.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/connectors/nested_observation_spaces.py"],
srcs = ["examples/connectors/flatten_observations_dict_space.py"],
args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=800.0", "--framework=torch", "--algo=PPO"]
)

Expand Down
2 changes: 1 addition & 1 deletion rllib/connectors/connector_pipeline_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def _fix_spaces(self):
obs_space = self.input_observation_space
act_space = self.input_action_space
for con in self.connectors:
con.input_observation_space = obs_space
con.input_action_space = act_space
con.input_observation_space = obs_space
obs_space = con.observation_space
act_space = con.action_space
3 changes: 2 additions & 1 deletion rllib/connectors/connector_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ def __init__(
self._action_space = None
self._input_observation_space = None
self._input_action_space = None
self.input_observation_space = input_observation_space

self.input_action_space = input_action_space
self.input_observation_space = input_observation_space

@OverrideToImplementCustomLogic
def recompute_observation_space_from_input_spaces(self) -> gym.Space:
Expand Down
4 changes: 2 additions & 2 deletions rllib/connectors/env_to_module/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
FlattenObservations,
)
from ray.rllib.connectors.env_to_module.prev_actions_prev_rewards import (
PrevActionsPrevRewardsConnector,
PrevActionsPrevRewards,
)
from ray.rllib.connectors.env_to_module.write_observations_to_episodes import (
WriteObservationsToEpisodes,
Expand All @@ -29,6 +29,6 @@
"EnvToModulePipeline",
"FlattenObservations",
"NumpyToTensor",
"PrevActionsPrevRewardsConnector",
"PrevActionsPrevRewards",
"WriteObservationsToEpisodes",
]
114 changes: 56 additions & 58 deletions rllib/connectors/env_to_module/flatten_observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import tree # pip install dm_tree

from ray.rllib.connectors.connector_v2 import ConnectorV2
from ray.rllib.core.columns import Columns
from ray.rllib.core.rl_module.rl_module import RLModule
from ray.rllib.utils.annotations import override
from ray.rllib.utils.numpy import flatten_inputs_to_1d_tensor
Expand All @@ -19,25 +18,20 @@
class FlattenObservations(ConnectorV2):
"""A connector piece that flattens all observation components into a 1D array.

- Only works on data that has already been added to the batch.
- This connector makes the assumption that under the Columns.OBS key in batch,
there is either a list of individual env observations to be flattened (single-agent
case) or a dict mapping agent- and module IDs to lists of data items to be
flattened (multi-agent case).
- Does NOT work in a Learner pipeline as it operates on individual observation
items (as opposed to batched/time-ranked data).
- Therefore, assumes that the altered (flattened) observations will be written
back into the episode by a later connector piece in the env-to-module pipeline
(which this piece is part of as well).
- Does NOT read any information from the given list of Episode objects.
- Does NOT write any observations (or other data) to the given Episode objects.
- Works directly on the incoming episodes list and changes the last observation
in-place (write the flattened observation back into the episode).
- This connector does NOT alter the incoming batch (`data`) when called.
- This connector does NOT work in a `LearnerConnectorPipeline` because it requires
the incoming episodes to still be ongoing (in progress) as it only alters the
latest observation, not all observations in an episode.

.. testcode::

import gymnasium as gym
import numpy as np

from ray.rllib.connectors.env_to_module import FlattenObservations
from ray.rllib.env.single_agent_episode import SingleAgentEpisode
from ray.rllib.utils.test_utils import check

# Some arbitrarily nested, complex observation space.
Expand All @@ -51,48 +45,50 @@ class FlattenObservations(ConnectorV2):
})
act_space = gym.spaces.Discrete(2)

# A batch of two example items, both coming from the above defined observation
# space.
batch = {
"obs": [
# 1st example item.
# Two example episodes, both with initial (reset) observations coming from the
# above defined observation space.
episode_1 = SingleAgentEpisode(
observations=[
{
"a": np.array(-10.0, np.float32),
"b": (1, np.array([[-1.0], [-1.0]], np.float32)),
"c": np.array([0, 2]),
},
# 2nd example item.
],
)
episode_2 = SingleAgentEpisode(
observations=[
{
"a": np.array(10.0, np.float32),
"b": (0, np.array([[1.0], [1.0]], np.float32)),
"c": np.array([1, 1]),
},
],
}
)

# Construct our connector piece.
connector = FlattenObservations(obs_space, act_space)

# Call our connector piece with the example data.
output_data = connector(
rl_module=None, # This connector works without an RLModule.
data=batch,
episodes=[], # This connector does not need the `episodes` input.
data={}, # This connector does not alter any data.
episodes=[episode_1, episode_2],
explore=True,
shared_data={},
)

# The connector does not change the number of items in the data (still 2 items).
check(len(output_data["obs"]), 2)
# The connector does not alter the data and acts as pure pass-through.
check(output_data, {})

# The connector has flattened each item in the data to a 1D tensor.
# The connector has flattened each item in the episodes to a 1D tensor.
check(
output_data["obs"][0],
episode_1.get_observations(0),
# box() disc(2). box(2, 1). multidisc(2, 3)........
np.array([-10.0, 0.0, 1.0, -1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 1.0]),
)
check(
output_data["obs"][1],
episode_2.get_observations(0),
# box() disc(2). box(2, 1). multidisc(2, 3)........
np.array([10.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]),
)
Expand Down Expand Up @@ -169,40 +165,42 @@ def __call__(
shared_data: Optional[dict] = None,
**kwargs,
) -> Any:
observations = data.get(Columns.OBS)

if observations is None:
raise ValueError(
f"`batch` must already have a column named {Columns.OBS} in it "
f"for this connector to work!"
)

# Process each item under the Columns.OBS key individually and flatten
# it. We are using the `ConnectorV2.foreach_batch_item_change_in_place` API,
# allowing us to not worry about multi- or single-agent setups and returning
# the new version of each item we are iterating over.
self.foreach_batch_item_change_in_place(
batch=data,
column=Columns.OBS,
func=(
lambda item, eps_id, agent_id, module_id: (
# Multi-agent AND skip this AgentID.
item
if self._agent_ids and agent_id not in self._agent_ids
# Single-agent or flatten this AgentIDs observation.
else flatten_inputs_to_1d_tensor(
item,
for sa_episode in self.single_agent_episode_iterator(
episodes, agents_that_stepped_only=True
):
# Episode is not finalized yet and thus still operates on lists of items.
assert not sa_episode.is_finalized

last_obs = sa_episode.get_observations(-1)

if self._multi_agent:
if (
self._agent_ids is not None
and sa_episode.agent_id not in self._agent_ids
):
flattened_obs = last_obs
else:
flattened_obs = flatten_inputs_to_1d_tensor(
inputs=last_obs,
# In the multi-agent case, we need to use the specific agent's
# space struct, not the multi-agent observation space dict.
(
self._input_obs_base_struct
if not agent_id
else self._input_obs_base_struct[agent_id]
),
# Our items are bare observations (no batch axis present).
spaces_struct=self._input_obs_base_struct[sa_episode.agent_id],
# Our items are individual observations (no batch axis present).
batch_axis=False,
)
else:
flattened_obs = flatten_inputs_to_1d_tensor(
inputs=last_obs,
spaces_struct=self._input_obs_base_struct,
# Our items are individual observations (no batch axis present).
batch_axis=False,
)
),
)

# Write new observation directly back into the episode.
sa_episode.set_observations(at_indices=-1, new_data=flattened_obs)
# We set the Episode's observation space to ours so that we can safely
# set the last obs to the new value (without causing a space mismatch
# error).
sa_episode.observation_space = self.observation_space

return data
13 changes: 5 additions & 8 deletions rllib/connectors/env_to_module/mean_std_filter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Any, Dict, List, Optional
from gymnasium.spaces import Discrete, MultiDiscrete

import gymnasium as gym
from gymnasium.spaces import Discrete, MultiDiscrete
import numpy as np
import tree

Expand Down Expand Up @@ -121,13 +121,10 @@ def __call__(
sa_obs, update=self._update_stats
)
sa_episode.set_observations(at_indices=-1, new_data=normalized_sa_obs)

if len(sa_episode) == 0:
# TODO (sven): This is kind of a hack.
# We set the Episode's observation space to ours so that we can safely
# set the last obs to the new value (without causing a space mismatch
# error).
sa_episode.observation_space = self.observation_space
# We set the Episode's observation space to ours so that we can safely
# set the last obs to the new value (without causing a space mismatch
# error).
sa_episode.observation_space = self.observation_space

# Leave `data` as is. RLlib's default connector will automatically
# populate the OBS column therein from the episodes' now transformed
Expand Down
Loading
Loading