-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RLlib] Add "official" benchmark script for Atari PPO benchmarks (new API stack). #45697
Changes from all commits
c8aed76
f1e2651
3e5a930
9dd1719
95a1bd4
0698bdf
274b44e
c97fd39
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
"""Script to execute RLlib's official PPO Atari benchmarks. | ||
|
||
How to run this script | ||
---------------------- | ||
`python [script-name].py --enable-new-api-stack --stop-timesteps 12000000 | ||
--num-gpus=4 --num-env-runners=95` | ||
|
||
In order to only run individual or lists of envs, you can provide a list of env-strings | ||
under the `--env` arg, such as `--env ALE/Pong-v5,ALE/Breakout-v5`. | ||
|
||
For logging to your WandB account, use: | ||
`--wandb-key=[your WandB API key] --wandb-project=[some project name] | ||
--wandb-run-name=[optional: WandB run name (within the defined project)]` | ||
|
||
|
||
Results to expect | ||
----------------- | ||
TODO (sven): Link to RLlib's to-be-created benchmark page. | ||
""" | ||
import subprocess | ||
|
||
from ray.rllib.utils.test_utils import add_rllib_example_script_args | ||
|
||
|
||
parser = add_rllib_example_script_args() | ||
|
||
# Might need `gymnasium[atari, other]` to be installed. | ||
|
||
# See the following links for benchmark results of other libraries: | ||
# Original paper: https://arxiv.org/abs/1812.05905 | ||
# CleanRL: https://wandb.ai/cleanrl/cleanrl.benchmark/reports/Mujoco--VmlldzoxODE0NjE | ||
# AgileRL: https://github.com/AgileRL/AgileRL?tab=readme-ov-file#benchmarks | ||
# [0] = reward to expect for DQN rainbow [1] = timesteps to run (always 200M for DQN | ||
# rainbow). | ||
# Note that for PPO, we simply run everything for 6M ts. | ||
benchmark_envs = { | ||
"ALE/Alien-v5": (6022.9, 200000000), | ||
"ALE/Amidar-v5": (202.8, 200000000), | ||
"ALE/Assault-v5": (14491.7, 200000000), | ||
"ALE/Asterix-v5": (280114.0, 200000000), | ||
"ALE/Asteroids-v5": (2249.4, 200000000), | ||
"ALE/Atlantis-v5": (814684.0, 200000000), | ||
"ALE/BankHeist-v5": (826.0, 200000000), | ||
"ALE/BattleZone-v5": (52040.0, 200000000), | ||
"ALE/BeamRider-v5": (21768.5, 200000000), | ||
"ALE/Berzerk-v5": (1793.4, 200000000), | ||
"ALE/Bowling-v5": (39.4, 200000000), | ||
"ALE/Boxing-v5": (54.9, 200000000), | ||
"ALE/Breakout-v5": (379.5, 200000000), | ||
"ALE/Centipede-v5": (7160.9, 200000000), | ||
"ALE/ChopperCommand-v5": (10916.0, 200000000), | ||
"ALE/CrazyClimber-v5": (143962.0, 200000000), | ||
"ALE/Defender-v5": (47671.3, 200000000), | ||
"ALE/DemonAttack-v5": (109670.7, 200000000), | ||
"ALE/DoubleDunk-v5": (-0.6, 200000000), | ||
"ALE/Enduro-v5": (2061.1, 200000000), | ||
"ALE/FishingDerby-v5": (22.6, 200000000), | ||
"ALE/Freeway-v5": (29.1, 200000000), | ||
"ALE/Frostbite-v5": (4141.1, 200000000), | ||
"ALE/Gopher-v5": (72595.7, 200000000), | ||
"ALE/Gravitar-v5": (567.5, 200000000), | ||
"ALE/Hero-v5": (50496.8, 200000000), | ||
"ALE/IceHockey-v5": (-11685.8, 200000000), | ||
"ALE/Kangaroo-v5": (10841.0, 200000000), | ||
"ALE/Krull-v5": (6715.5, 200000000), | ||
"ALE/KungFuMaster-v5": (28999.8, 200000000), | ||
"ALE/MontezumaRevenge-v5": (154.0, 200000000), | ||
"ALE/MsPacman-v5": (2570.2, 200000000), | ||
"ALE/NameThisGame-v5": (11686.5, 200000000), | ||
"ALE/Phoenix-v5": (103061.6, 200000000), | ||
"ALE/Pitfall-v5": (-37.6, 200000000), | ||
"ALE/Pong-v5": (19.0, 200000000), | ||
"ALE/PrivateEye-v5": (1704.4, 200000000), | ||
"ALE/Qbert-v5": (18397.6, 200000000), | ||
"ALE/RoadRunner-v5": (54261.0, 200000000), | ||
"ALE/Robotank-v5": (55.2, 200000000), | ||
"ALE/Seaquest-v5": (19176.0, 200000000), | ||
"ALE/Skiing-v5": (-11685.8, 200000000), | ||
"ALE/Solaris-v5": (2860.7, 200000000), | ||
"ALE/SpaceInvaders-v5": (12629.0, 200000000), | ||
"ALE/StarGunner-v5": (123853.0, 200000000), | ||
"ALE/Surround-v5": (7.0, 200000000), | ||
"ALE/Tennis-v5": (-2.2, 200000000), | ||
"ALE/TimePilot-v5": (11190.5, 200000000), | ||
"ALE/Tutankham-v5": (126.9, 200000000), | ||
"ALE/Venture-v5": (45.0, 200000000), | ||
"ALE/VideoPinball-v5": (506817.2, 200000000), | ||
"ALE/WizardOfWor-v5": (14631.5, 200000000), | ||
"ALE/YarsRevenge-v5": (93007.9, 200000000), | ||
"ALE/Zaxxon-v5": (19658.0, 200000000), | ||
} | ||
|
||
|
||
if __name__ == "__main__": | ||
args = parser.parse_args() | ||
|
||
# Compile the base command running the actual `tuned_example` script. | ||
base_commands = [ | ||
"python", | ||
"../../tuned_examples/ppo/atari_ppo.py", | ||
"--enable-new-api-stack", | ||
f"--num-env-runners={args.num_env_runners}" if args.num_env_runners else "", | ||
f"--num-gpus={args.num_gpus}", | ||
f"--wandb-key={args.wandb_key}" if args.wandb_key else "", | ||
f"--wandb-project={args.wandb_project}" if args.wandb_project else "", | ||
f"--wandb-run-name={args.wandb_run_name}" if args.wandb_run_name else "", | ||
f"--stop-timesteps={args.stop_timesteps}", | ||
f"--checkpoint-freq={args.checkpoint_freq}", | ||
"--checkpoint-at-end" if args.checkpoint_at_end else "", | ||
] | ||
|
||
# Loop through all envs (given on command line or found in `benchmark_envs` and | ||
# run the `tuned_example` script for each of them. | ||
for env_name in args.env.split(",") if args.env else benchmark_envs.keys(): | ||
# Remove missing commands. | ||
commands = [] | ||
for c in base_commands: | ||
if c != "": | ||
commands.append(c) | ||
commands.append(f"--env={env_name}") | ||
commands.append(f"--wandb-run-name={env_name}") | ||
print(f"Running {env_name} through command line=`{commands}`") | ||
subprocess.run(commands) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is somehow strange to me that we emulate running from the command line which in turn runs a script that could have been triggered direclty in the loop. It makes sense to me that users can run single envs, but why not triggering them directly in the loop? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,18 @@ | ||
import gymnasium as gym | ||
|
||
from ray import tune | ||
from ray.rllib.algorithms.ppo import PPOConfig | ||
from ray.rllib.connectors.env_to_module.frame_stacking import FrameStackingEnvToModule | ||
from ray.rllib.connectors.learner.frame_stacking import FrameStackingLearner | ||
from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack | ||
from ray.rllib.utils.metrics import ( | ||
ENV_RUNNER_RESULTS, | ||
EPISODE_RETURN_MEAN, | ||
NUM_ENV_STEPS_SAMPLED_LIFETIME, | ||
) | ||
from ray.rllib.utils.test_utils import add_rllib_example_script_args | ||
from ray import tune | ||
|
||
|
||
parser = add_rllib_example_script_args() | ||
parser = add_rllib_example_script_args( | ||
default_reward=float("inf"), | ||
default_timesteps=3000000, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we have set |
||
default_iters=100000000000, | ||
) | ||
# Use `parser` to add your own custom command line options to this script | ||
# and (if needed) use their values toset up `config` below. | ||
args = parser.parse_args() | ||
|
@@ -81,13 +80,8 @@ def _env_creator(cfg): | |
) | ||
) | ||
|
||
stop = { | ||
f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": 20.0, | ||
NUM_ENV_STEPS_SAMPLED_LIFETIME: 1500000, | ||
} | ||
|
||
|
||
if __name__ == "__main__": | ||
from ray.rllib.utils.test_utils import run_rllib_example_script_experiment | ||
|
||
run_rllib_example_script_experiment(config, args=args, stop=stop) | ||
run_rllib_example_script_experiment(config, args=args) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In
atari_ppo.py
thetimesteps
are set to3M
.