From c748df8139bd5756eb1bac6d373997c137f336f9 Mon Sep 17 00:00:00 2001 From: Simon Zehnder Date: Fri, 10 May 2024 12:16:30 +0200 Subject: [PATCH 1/4] Changed comment. Signed-off-by: Simon Zehnder --- rllib/utils/replay_buffers/prioritized_episode_replay_buffer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rllib/utils/replay_buffers/prioritized_episode_replay_buffer.py b/rllib/utils/replay_buffers/prioritized_episode_replay_buffer.py index f7e818a659cf..38690232351e 100644 --- a/rllib/utils/replay_buffers/prioritized_episode_replay_buffer.py +++ b/rllib/utils/replay_buffers/prioritized_episode_replay_buffer.py @@ -288,7 +288,7 @@ def add( for i in range(len(eps)) ] ) - # Increase index. + # Increase index to the new length of `self._indices`. j = len(self._indices) @override(EpisodeReplayBuffer) From b95848c75a550e58968abb804e14afbe78a73668 Mon Sep 17 00:00:00 2001 From: Simon Zehnder Date: Fri, 31 May 2024 10:51:39 +0200 Subject: [PATCH 2/4] Fixed a minor bug that was calling the calback 'on_episode_created' after the 'env.reset' instead before. The docstring in the callback clearly states, it should come before the reset. Signed-off-by: Simon Zehnder --- rllib/env/single_agent_env_runner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py index 5f708fa600a7..32368b94d6a0 100644 --- a/rllib/env/single_agent_env_runner.py +++ b/rllib/env/single_agent_env_runner.py @@ -437,15 +437,15 @@ def _sample_episodes( done_episodes_to_return: List[SingleAgentEpisode] = [] - # Reset the environment. - # TODO (simon): Check, if we need here the seed from the config. - obs, infos = self.env.reset() episodes = [] for env_index in range(self.num_envs): episodes.append(self._new_episode()) self._make_on_episode_callback("on_episode_created", env_index, episodes) _shared_data = {} + # Reset the environment. + # TODO (simon): Check, if we need here the seed from the config. + obs, infos = self.env.reset() for env_index in range(self.num_envs): episodes[env_index].add_env_reset( observation=obs[env_index], From ca98704fdc1478228f60418d11facdfe69f7600d Mon Sep 17 00:00:00 2001 From: Simon Zehnder Date: Fri, 31 May 2024 10:59:13 +0200 Subject: [PATCH 3/4] Modified callback order in 'MultiAgentEnvRunner'. Signed-off-by: Simon Zehnder --- rllib/env/multi_agent_env_runner.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py index f374f45b35e6..1300c8fd34b9 100644 --- a/rllib/env/multi_agent_env_runner.py +++ b/rllib/env/multi_agent_env_runner.py @@ -414,10 +414,6 @@ def _sample_episodes( done_episodes_to_return: List[MultiAgentEpisode] = [] - # Reset the environment. - # TODO (simon): Check, if we need here the seed from the config. - obs, infos = self.env.reset() - # Create a new multi-agent episode. _episode = self._new_episode() self._make_on_episode_callback("on_episode_created", _episode) @@ -425,6 +421,9 @@ def _sample_episodes( "agent_to_module_mapping_fn": self.config.policy_mapping_fn, } + # Reset the environment. + # TODO (simon): Check, if we need here the seed from the config. + obs, infos = self.env.reset() # Set initial obs and infos in the episodes. _episode.add_env_reset(observations=obs, infos=infos) self._make_on_episode_callback("on_episode_start", _episode) From d8eee30d5511d933e37ee0d73300e93af0a97ab4 Mon Sep 17 00:00:00 2001 From: Simon Zehnder Date: Fri, 31 May 2024 13:05:50 +0200 Subject: [PATCH 4/4] Implemented @sven1977's review. Signed-off-by: Simon Zehnder --- rllib/algorithms/callbacks.py | 11 +++++++---- rllib/env/single_agent_env_runner.py | 8 +++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/rllib/algorithms/callbacks.py b/rllib/algorithms/callbacks.py index e0d595be3c17..db404ed67afe 100644 --- a/rllib/algorithms/callbacks.py +++ b/rllib/algorithms/callbacks.py @@ -245,19 +245,22 @@ def on_episode_created( """Callback run when a new episode is created (but has not started yet!). This method gets called after a new Episode(V2) (old stack) or - SingleAgentEpisode/MultiAgentEpisode instance has been created. + MultiAgentEpisode instance has been created. This happens before the respective sub-environment's (usually a gym.Env) `reset()` is called by RLlib. - 1) Episode(V2)/Single-/MultiAgentEpisode created: This callback is called. + Note, at the moment this callback does not get called in the new API stack + and single-agent mode. + + 1) Episode(V2)/MultiAgentEpisode created: This callback is called. 2) Respective sub-environment (gym.Env) is `reset()`. 3) Callback `on_episode_start` is called. 4) Stepping through sub-environment/episode commences. Args: episode: The newly created episode. On the new API stack, this will be a - SingleAgentEpisode or MultiAgentEpisode object. On the old API stack, - this will be a Episode or EpisodeV2 object. + MultiAgentEpisode object. On the old API stack, this will be a + Episode or EpisodeV2 object. This is the episode that is about to be started with an upcoming `env.reset()`. Only after this reset call, the `on_episode_start` callback will be called. diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py index 32368b94d6a0..3025689255f2 100644 --- a/rllib/env/single_agent_env_runner.py +++ b/rllib/env/single_agent_env_runner.py @@ -224,11 +224,12 @@ def _sample_timesteps( # Have to reset the env (on all vector sub_envs). if force_reset or self._needs_initial_reset: - # Create n new episodes and make the `on_episode_created` callbacks. + # Create n new episodes. + # TODO (sven): Add callback `on_episode_created` as soon as + # `gymnasium-v1.0.0a2` PR is coming. self._episodes = [] for env_index in range(self.num_envs): self._episodes.append(self._new_episode()) - self._make_on_episode_callback("on_episode_created", env_index) self._shared_data = {} # Erase all cached ongoing episodes (these will never be completed and @@ -440,7 +441,8 @@ def _sample_episodes( episodes = [] for env_index in range(self.num_envs): episodes.append(self._new_episode()) - self._make_on_episode_callback("on_episode_created", env_index, episodes) + # TODO (sven): Add callback `on_episode_created` as soon as + # `gymnasium-v1.0.0a2` PR is coming. _shared_data = {} # Reset the environment.