From e6e21ac2bba8b88c66c88b553a40b21a1c78f0a4 Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Mon, 3 Jun 2024 16:22:59 +0200 Subject: [PATCH] [RLlib] Enhance env-rendering callback. (#45682) --- .../envs/env_rendering_and_recording.py | 58 +++++++++++-------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/rllib/examples/envs/env_rendering_and_recording.py b/rllib/examples/envs/env_rendering_and_recording.py index a671087b42dd..d910ac92fc57 100644 --- a/rllib/examples/envs/env_rendering_and_recording.py +++ b/rllib/examples/envs/env_rendering_and_recording.py @@ -59,6 +59,7 @@ """ import gymnasium as gym import numpy as np +from typing import Optional, Sequence from ray.rllib.algorithms.callbacks import DefaultCallbacks from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack @@ -85,8 +86,10 @@ class EnvRenderCallback(DefaultCallbacks): and temporarily store it in the Episode object. """ - def __init__(self): + def __init__(self, env_runner_indices: Optional[Sequence[int]] = None): super().__init__() + # Only render and record on certain EnvRunner indices? + self.env_runner_indices = env_runner_indices # Per sample round (on this EnvRunner), we want to only log the best- and # worst performing episode's videos in the custom metrics. Otherwise, too much # data would be sent to WandB. @@ -108,6 +111,12 @@ def on_episode_step( Note that this would work with MultiAgentEpisodes as well. """ + if ( + self.env_runner_indices is not None + and env_runner.worker_index not in self.env_runner_indices + ): + return + # If we have a vector env, only render the sub-env at index 0. if isinstance(env.unwrapped, gym.vector.VectorEnv): image = env.envs[0].render() @@ -184,30 +193,31 @@ def on_sample_end( ) -> None: """Logs the best and worst video to this EnvRunner's MetricsLogger.""" # Best video. - metrics_logger.log_value( - "episode_videos_best", - self.best_episode_and_return[0], - # Do not reduce the videos (across the various parallel EnvRunners). This - # would not make sense (mean over the pixels?). Instead, we want to log all - # best videos of all EnvRunners per iteration. - reduce=None, - # B/c we do NOT reduce over the video data (mean/min/max), we need to make - # sure the list of videos in our MetricsLogger does not grow infinitely and - # gets cleared after each `reduce()` operation, meaning every time, the - # EnvRunner is asked to send its logged metrics. - clear_on_reduce=True, - ) + if self.best_episode_and_return[0] is not None: + metrics_logger.log_value( + "episode_videos_best", + self.best_episode_and_return[0], + # Do not reduce the videos (across the various parallel EnvRunners). + # This would not make sense (mean over the pixels?). Instead, we want to + # log all best videos of all EnvRunners per iteration. + reduce=None, + # B/c we do NOT reduce over the video data (mean/min/max), we need to + # make sure the list of videos in our MetricsLogger does not grow + # infinitely and gets cleared after each `reduce()` operation, meaning + # every time, the EnvRunner is asked to send its logged metrics. + clear_on_reduce=True, + ) + self.best_episode_and_return = (None, float("-inf")) # Worst video. - metrics_logger.log_value( - "episode_videos_worst", - self.worst_episode_and_return[0], - # Same logging options as above. - reduce=None, - clear_on_reduce=True, - ) - # Reset our best/worst placeholders. - self.best_episode_and_return = (None, float("-inf")) - self.worst_episode_and_return = (None, float("inf")) + if self.worst_episode_and_return[0] is not None: + metrics_logger.log_value( + "episode_videos_worst", + self.worst_episode_and_return[0], + # Same logging options as above. + reduce=None, + clear_on_reduce=True, + ) + self.worst_episode_and_return = (None, float("inf")) if __name__ == "__main__":