From 1164b24ef9ef0f5d19cf600e3e2f5aa7d9c1a857 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Thu, 3 Nov 2022 09:57:28 +0100 Subject: [PATCH 1/5] [rllib] stopper support for python based training Signed-off-by: Max Pumperla --- rllib/common.py | 5 ++++- rllib/scripts.py | 2 ++ rllib/tests/test_rllib_train_and_evaluate.py | 5 +++-- rllib/train.py | 18 +++++++++--------- rllib/tuned_examples/a2c/cartpole_a2c.py | 4 +++- rllib/tuned_examples/a3c/__init__.py | 0 rllib/tuned_examples/a3c/cartpole_a3c.py | 13 +++++++++++++ .../simple_q/cartpole_simpleq_test.py | 4 +++- 8 files changed, 37 insertions(+), 14 deletions(-) create mode 100644 rllib/tuned_examples/a3c/__init__.py create mode 100644 rllib/tuned_examples/a3c/cartpole_a3c.py diff --git a/rllib/common.py b/rllib/common.py index 2d90aa8820f2..a998fe5a9989 100644 --- a/rllib/common.py +++ b/rllib/common.py @@ -253,6 +253,7 @@ class CLIArguments: "cartpole-a2c": { "file": "tuned_examples/a2c/cartpole_a2c.py", "file_type": SupportedFileType.python, + "stop": "{'timesteps_total': 50000, 'episode_reward_mean': 200}", "description": "Runs A2C on the CartPole-v1 environment.", }, "cartpole-a2c-micro": { @@ -261,7 +262,9 @@ class CLIArguments: }, # A3C "cartpole-a3c": { - "file": "tuned_examples/a3c/cartpole-a3c.yaml", + "file": "tuned_examples/a3c/cartpole_a3c.py", + "file_type": SupportedFileType.python, + "stop": "{'timesteps_total': 20000, 'episode_reward_mean': 150}", "description": "Runs A3C on the CartPole-v1 environment.", }, "pong-a3c": { diff --git a/rllib/scripts.py b/rllib/scripts.py index c7934906f01f..6bc23f3d8d9a 100644 --- a/rllib/scripts.py +++ b/rllib/scripts.py @@ -102,10 +102,12 @@ def run(example_id: str = typer.Argument(..., help="Example ID to run.")): example_file = get_example_file(example_id) example_file, temp_file = download_example_file(example_file) file_type = example.get("file_type", SupportedFileType.yaml) + stop = example.get("stop", "{}") train_module.file( config_file=example_file, file_type=file_type, + stop=stop, checkpoint_freq=1, checkpoint_at_end=True, keep_checkpoints_num=None, diff --git a/rllib/tests/test_rllib_train_and_evaluate.py b/rllib/tests/test_rllib_train_and_evaluate.py index 6d69a256a3d3..52063d0c51e2 100644 --- a/rllib/tests/test_rllib_train_and_evaluate.py +++ b/rllib/tests/test_rllib_train_and_evaluate.py @@ -298,11 +298,12 @@ def test_json_run(self): def test_python_run(self): assert os.popen( f"python {rllib_dir}/scripts.py train file tuned_examples/simple_q/" - f"cartpole_simpleq_test.py --type=python" + f"cartpole_simpleq_test.py --type=python " + f"--stop={'timesteps_total': 50000, 'episode_reward_mean': 200}" ).read() def test_all_example_files_exist(self): - """ "The 'example' command now knows about example files, + """The 'example' command now knows about example files, so we check that they exist.""" from ray.rllib.common import EXAMPLES diff --git a/rllib/train.py b/rllib/train.py index b7971464a070..8a9c14ae586e 100755 --- a/rllib/train.py +++ b/rllib/train.py @@ -12,7 +12,7 @@ from ray.tune.schedulers import create_scheduler from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.common import CLIArguments as cli -from ray.rllib.common import FrameworkEnum, SupportedFileType +from ray.rllib.common import FrameworkEnum, SupportedFileType, download_example_file def import_backends(): @@ -51,7 +51,7 @@ def _patch_path(path: str): def load_experiments_from_file( - config_file: str, file_type: SupportedFileType, checkpoint_config: dict + config_file: str, file_type: SupportedFileType, checkpoint_config: dict, stop: str ) -> dict: """Load experiments from a file. Supports YAML, JSON and Python files. If you want to use a Python file, it has to have a 'config' variable @@ -88,10 +88,8 @@ def load_experiments_from_file( } } - # If there's a "stop" dict, add it to the experiment. - if hasattr(module, "stop"): - stop = getattr(module, "stop") - experiments["default"]["stop"] = stop + # Add a stopping condition if provided + experiments["default"]["stop"] = json.loads(stop) for key, val in experiments.items(): experiments[key]["checkpoint_config"] = checkpoint_config @@ -104,6 +102,8 @@ def file( # File-based arguments. config_file: str = cli.ConfigFile, file_type: SupportedFileType = cli.FileType, + # stopping conditions + stop: str = cli.Stop, # Checkpointing checkpoint_freq: int = cli.CheckpointFreq, checkpoint_at_end: bool = cli.CheckpointAtEnd, @@ -137,8 +137,6 @@ def file( rllib train file https://raw.githubusercontent.com/ray-project/ray/\ master/rllib/tuned_examples/ppo/cartpole-ppo.yaml """ - from ray.rllib.common import download_example_file - # Attempt to download the file if it's not found locally. config_file, temp_file = download_example_file( example_file=config_file, base_url=None @@ -154,7 +152,9 @@ def file( "checkpoint_score_attribute": checkpoint_score_attr, } - experiments = load_experiments_from_file(config_file, file_type, checkpoint_config) + experiments = load_experiments_from_file( + config_file, file_type, checkpoint_config, stop + ) exp_name = list(experiments.keys())[0] algo = experiments[exp_name]["run"] diff --git a/rllib/tuned_examples/a2c/cartpole_a2c.py b/rllib/tuned_examples/a2c/cartpole_a2c.py index f456297abd74..743bec365e4c 100644 --- a/rllib/tuned_examples/a2c/cartpole_a2c.py +++ b/rllib/tuned_examples/a2c/cartpole_a2c.py @@ -1,3 +1,6 @@ +# Run with: +# rllib train -f cartpole_a2c.py --type python \ +# --stop={'timesteps_total': 50000, 'episode_reward_mean': 200}" from ray.rllib.algorithms.a2c import A2CConfig @@ -8,4 +11,3 @@ .framework("tf") .rollouts(num_rollout_workers=0) ) -stop = {"episode_reward_mean": 150, "timesteps_total": 500000} diff --git a/rllib/tuned_examples/a3c/__init__.py b/rllib/tuned_examples/a3c/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/rllib/tuned_examples/a3c/cartpole_a3c.py b/rllib/tuned_examples/a3c/cartpole_a3c.py new file mode 100644 index 000000000000..0d8dd516c30a --- /dev/null +++ b/rllib/tuned_examples/a3c/cartpole_a3c.py @@ -0,0 +1,13 @@ +# Run with: +# rllib train -f cartpole_a3c.py --type python \ +# --stop={'timesteps_total': 20000, 'episode_reward_mean': 150}" +from ray.rllib.algorithms.a3c import A3CConfig + + +config = ( + A3CConfig() + .training(gamma=0.95) + .environment("CartPole-v1") + .framework("tf") + .rollouts(num_rollout_workers=0) +) diff --git a/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py b/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py index 885892d5aef2..db24e2612c13 100644 --- a/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py +++ b/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py @@ -1,3 +1,6 @@ +# Run with: +# rllib train -f cartpole_simpleq_test.py --type python \ +# --stop={'timesteps_total': 50000, 'episode_reward_mean': 200}" from ray.rllib.algorithms.simple_q import SimpleQConfig @@ -7,4 +10,3 @@ .framework("tf") .rollouts(num_rollout_workers=0) ) -stop = {"episode_reward_mean": 150, "timesteps_total": 50000} From 3b901c01d30aca343334e763b9db7ea685e6e033 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Fri, 4 Nov 2022 16:05:43 +0100 Subject: [PATCH 2/5] rm json, rm explicit file type, clean Signed-off-by: Max Pumperla --- rllib/common.py | 19 +++++++++++++++---- rllib/scripts.py | 3 +-- rllib/tests/test_rllib_train_and_evaluate.py | 8 +------- rllib/train.py | 11 +++++------ rllib/tuned_examples/a2c/cartpole-a2c.json | 15 --------------- rllib/tuned_examples/a2c/cartpole_a2c.py | 2 +- rllib/tuned_examples/a3c/cartpole_a3c.py | 2 +- .../simple_q/cartpole_simpleq_test.py | 2 +- 8 files changed, 25 insertions(+), 37 deletions(-) delete mode 100644 rllib/tuned_examples/a2c/cartpole-a2c.json diff --git a/rllib/common.py b/rllib/common.py index a998fe5a9989..0c3fc96020e4 100644 --- a/rllib/common.py +++ b/rllib/common.py @@ -24,10 +24,23 @@ class SupportedFileType(str, Enum): """Supported file types for RLlib, used for CLI argument validation.""" yaml = "yaml" - json = "json" python = "python" +def get_file_type(config_file: str) -> SupportedFileType: + if ".py" in config_file: + file_type = SupportedFileType.python + elif ".yaml" in config_file or ".yml" in config_file: + file_type = SupportedFileType.yaml + else: + raise ValueError( + "Unknown file type for config " + "file: {}. Supported extensions: .py, " + "yml, yaml.".format(config_file) + ) + return file_type + + def _create_tune_parser_help(): """Create a Tune dummy parser to access its 'help' docstrings.""" parser = _make_parser( @@ -105,7 +118,7 @@ def get_help(key: str) -> str: "`ray.rllib.examples.env.simple_corridor.SimpleCorridor`).", config_file="Use the algorithm configuration from this file.", filetype="The file type of the config file. Defaults to 'yaml' and can also be " - "'json', or 'python'.", + "'python'.", experiment_name="Name of the subdirectory under `local_dir` to put results in.", framework="The identifier of the deep learning framework you want to use." "Choose between TensorFlow 1.x ('tf'), TensorFlow 2.x ('tf2'), " @@ -252,7 +265,6 @@ class CLIArguments: }, "cartpole-a2c": { "file": "tuned_examples/a2c/cartpole_a2c.py", - "file_type": SupportedFileType.python, "stop": "{'timesteps_total': 50000, 'episode_reward_mean': 200}", "description": "Runs A2C on the CartPole-v1 environment.", }, @@ -263,7 +275,6 @@ class CLIArguments: # A3C "cartpole-a3c": { "file": "tuned_examples/a3c/cartpole_a3c.py", - "file_type": SupportedFileType.python, "stop": "{'timesteps_total': 20000, 'episode_reward_mean': 150}", "description": "Runs A3C on the CartPole-v1 environment.", }, diff --git a/rllib/scripts.py b/rllib/scripts.py index 6bc23f3d8d9a..777c5e1f7540 100644 --- a/rllib/scripts.py +++ b/rllib/scripts.py @@ -13,6 +13,7 @@ SupportedFileType, example_help, download_example_file, + get_file_type, ) # Main Typer CLI app @@ -101,12 +102,10 @@ def run(example_id: str = typer.Argument(..., help="Example ID to run.")): example = EXAMPLES[example_id] example_file = get_example_file(example_id) example_file, temp_file = download_example_file(example_file) - file_type = example.get("file_type", SupportedFileType.yaml) stop = example.get("stop", "{}") train_module.file( config_file=example_file, - file_type=file_type, stop=stop, checkpoint_freq=1, checkpoint_at_end=True, diff --git a/rllib/tests/test_rllib_train_and_evaluate.py b/rllib/tests/test_rllib_train_and_evaluate.py index 52063d0c51e2..a7bf73b74acf 100644 --- a/rllib/tests/test_rllib_train_and_evaluate.py +++ b/rllib/tests/test_rllib_train_and_evaluate.py @@ -289,16 +289,10 @@ def test_yaml_run(self): f"cartpole-simpleq-test.yaml" ).read() - def test_json_run(self): - assert os.popen( - f"python {rllib_dir}/scripts.py train file tuned_examples/simple_q/" - f"cartpole-simpleq-test.json --type=json" - ).read() - def test_python_run(self): assert os.popen( f"python {rllib_dir}/scripts.py train file tuned_examples/simple_q/" - f"cartpole_simpleq_test.py --type=python " + f"cartpole_simpleq_test.py " f"--stop={'timesteps_total': 50000, 'episode_reward_mean': 200}" ).read() diff --git a/rllib/train.py b/rllib/train.py index 8a9c14ae586e..67d4ded2a25a 100755 --- a/rllib/train.py +++ b/rllib/train.py @@ -12,7 +12,8 @@ from ray.tune.schedulers import create_scheduler from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.common import CLIArguments as cli -from ray.rllib.common import FrameworkEnum, SupportedFileType, download_example_file +from ray.rllib.common import FrameworkEnum, SupportedFileType +from ray.rllib.common import download_example_file, get_file_type def import_backends(): @@ -53,16 +54,13 @@ def _patch_path(path: str): def load_experiments_from_file( config_file: str, file_type: SupportedFileType, checkpoint_config: dict, stop: str ) -> dict: - """Load experiments from a file. Supports YAML, JSON and Python files. + """Load experiments from a file. Supports YAML and Python files. If you want to use a Python file, it has to have a 'config' variable that is an AlgorithmConfig object.""" if file_type == SupportedFileType.yaml: with open(config_file) as f: experiments = yaml.safe_load(f) - elif file_type == SupportedFileType.json: - with open(config_file) as f: - experiments = json.load(f) else: # Python file case (ensured by file type enum) import importlib @@ -101,7 +99,6 @@ def load_experiments_from_file( def file( # File-based arguments. config_file: str = cli.ConfigFile, - file_type: SupportedFileType = cli.FileType, # stopping conditions stop: str = cli.Stop, # Checkpointing @@ -152,6 +149,8 @@ def file( "checkpoint_score_attribute": checkpoint_score_attr, } + file_type = get_file_type(config_file) + experiments = load_experiments_from_file( config_file, file_type, checkpoint_config, stop ) diff --git a/rllib/tuned_examples/a2c/cartpole-a2c.json b/rllib/tuned_examples/a2c/cartpole-a2c.json deleted file mode 100644 index bfe5781f92ee..000000000000 --- a/rllib/tuned_examples/a2c/cartpole-a2c.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "cartpole-a2c":{ - "env":"CartPole-v0", - "run":"A2C", - "stop":{ - "episode_reward_mean":150, - "timesteps_total":500000 - }, - "config":{ - "framework":"tf", - "num_workers":0, - "lr":0.001 - } - } -} \ No newline at end of file diff --git a/rllib/tuned_examples/a2c/cartpole_a2c.py b/rllib/tuned_examples/a2c/cartpole_a2c.py index 743bec365e4c..e48d8c466e09 100644 --- a/rllib/tuned_examples/a2c/cartpole_a2c.py +++ b/rllib/tuned_examples/a2c/cartpole_a2c.py @@ -1,5 +1,5 @@ # Run with: -# rllib train -f cartpole_a2c.py --type python \ +# rllib train file cartpole_a2c.py \ # --stop={'timesteps_total': 50000, 'episode_reward_mean': 200}" from ray.rllib.algorithms.a2c import A2CConfig diff --git a/rllib/tuned_examples/a3c/cartpole_a3c.py b/rllib/tuned_examples/a3c/cartpole_a3c.py index 0d8dd516c30a..464f1ebcb9f8 100644 --- a/rllib/tuned_examples/a3c/cartpole_a3c.py +++ b/rllib/tuned_examples/a3c/cartpole_a3c.py @@ -1,5 +1,5 @@ # Run with: -# rllib train -f cartpole_a3c.py --type python \ +# rllib train file cartpole_a3c.py \ # --stop={'timesteps_total': 20000, 'episode_reward_mean': 150}" from ray.rllib.algorithms.a3c import A3CConfig diff --git a/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py b/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py index db24e2612c13..ba2816968870 100644 --- a/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py +++ b/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py @@ -1,5 +1,5 @@ # Run with: -# rllib train -f cartpole_simpleq_test.py --type python \ +# rllib train -f cartpole_simpleq_test.py\ # --stop={'timesteps_total': 50000, 'episode_reward_mean': 200}" from ray.rllib.algorithms.simple_q import SimpleQConfig From 1c870b172b8ca4cbda8b07d415026867e49b5c26 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Fri, 4 Nov 2022 16:39:14 +0100 Subject: [PATCH 3/5] lint Signed-off-by: Max Pumperla --- rllib/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rllib/train.py b/rllib/train.py index 0d23a45ddb6c..6a1dad12aa43 100644 --- a/rllib/train.py +++ b/rllib/train.py @@ -54,8 +54,8 @@ def _patch_path(path: str): def load_experiments_from_file( config_file: str, file_type: SupportedFileType, + stop: str, checkpoint_config: Optional[dict] = None, - stop: str ) -> dict: """Load experiments from a file. Supports YAML and Python files. If you want to use a Python file, it has to have a 'config' variable @@ -155,7 +155,7 @@ def file( file_type = get_file_type(config_file) experiments = load_experiments_from_file( - config_file, file_type, checkpoint_config, stop + config_file, file_type, stop, checkpoint_config ) exp_name = list(experiments.keys())[0] algo = experiments[exp_name]["run"] From e209664832b2ef5e1b604a267305a1f3ed4573a9 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Fri, 4 Nov 2022 16:56:20 +0100 Subject: [PATCH 4/5] make stop optional Signed-off-by: Max Pumperla --- rllib/train.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rllib/train.py b/rllib/train.py index 6a1dad12aa43..bb36a6ee1cfd 100644 --- a/rllib/train.py +++ b/rllib/train.py @@ -54,7 +54,7 @@ def _patch_path(path: str): def load_experiments_from_file( config_file: str, file_type: SupportedFileType, - stop: str, + stop: Optional[str] = None, checkpoint_config: Optional[dict] = None, ) -> dict: """Load experiments from a file. Supports YAML and Python files. @@ -90,7 +90,8 @@ def load_experiments_from_file( } # Add a stopping condition if provided - experiments["default"]["stop"] = json.loads(stop) + if stop: + experiments["default"]["stop"] = json.loads(stop) for key, val in experiments.items(): experiments[key]["checkpoint_config"] = checkpoint_config or {} From 79d5b2938539745dd41142fbd63ed844254b1c45 Mon Sep 17 00:00:00 2001 From: Max Pumperla Date: Fri, 4 Nov 2022 17:40:17 +0100 Subject: [PATCH 5/5] rm unused imports Signed-off-by: Max Pumperla --- rllib/scripts.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/rllib/scripts.py b/rllib/scripts.py index 777c5e1f7540..b20b97c9d146 100644 --- a/rllib/scripts.py +++ b/rllib/scripts.py @@ -10,10 +10,8 @@ from ray.rllib.common import ( EXAMPLES, FrameworkEnum, - SupportedFileType, example_help, download_example_file, - get_file_type, ) # Main Typer CLI app