From 1164b24ef9ef0f5d19cf600e3e2f5aa7d9c1a857 Mon Sep 17 00:00:00 2001
From: Max Pumperla <max.pumperla@googlemail.com>
Date: Thu, 3 Nov 2022 09:57:28 +0100
Subject: [PATCH 1/5] [rllib] stopper support for python based training

Signed-off-by: Max Pumperla <max.pumperla@googlemail.com>
---
 rllib/common.py                                |  5 ++++-
 rllib/scripts.py                               |  2 ++
 rllib/tests/test_rllib_train_and_evaluate.py   |  5 +++--
 rllib/train.py                                 | 18 +++++++++---------
 rllib/tuned_examples/a2c/cartpole_a2c.py       |  4 +++-
 rllib/tuned_examples/a3c/__init__.py           |  0
 rllib/tuned_examples/a3c/cartpole_a3c.py       | 13 +++++++++++++
 .../simple_q/cartpole_simpleq_test.py          |  4 +++-
 8 files changed, 37 insertions(+), 14 deletions(-)
 create mode 100644 rllib/tuned_examples/a3c/__init__.py
 create mode 100644 rllib/tuned_examples/a3c/cartpole_a3c.py

diff --git a/rllib/common.py b/rllib/common.py
index 2d90aa8820f2..a998fe5a9989 100644
--- a/rllib/common.py
+++ b/rllib/common.py
@@ -253,6 +253,7 @@ class CLIArguments:
     "cartpole-a2c": {
         "file": "tuned_examples/a2c/cartpole_a2c.py",
         "file_type": SupportedFileType.python,
+        "stop": "{'timesteps_total': 50000, 'episode_reward_mean': 200}",
         "description": "Runs A2C on the CartPole-v1 environment.",
     },
     "cartpole-a2c-micro": {
@@ -261,7 +262,9 @@ class CLIArguments:
     },
     # A3C
     "cartpole-a3c": {
-        "file": "tuned_examples/a3c/cartpole-a3c.yaml",
+        "file": "tuned_examples/a3c/cartpole_a3c.py",
+        "file_type": SupportedFileType.python,
+        "stop": "{'timesteps_total': 20000, 'episode_reward_mean': 150}",
         "description": "Runs A3C on the CartPole-v1 environment.",
     },
     "pong-a3c": {
diff --git a/rllib/scripts.py b/rllib/scripts.py
index c7934906f01f..6bc23f3d8d9a 100644
--- a/rllib/scripts.py
+++ b/rllib/scripts.py
@@ -102,10 +102,12 @@ def run(example_id: str = typer.Argument(..., help="Example ID to run.")):
     example_file = get_example_file(example_id)
     example_file, temp_file = download_example_file(example_file)
     file_type = example.get("file_type", SupportedFileType.yaml)
+    stop = example.get("stop", "{}")
 
     train_module.file(
         config_file=example_file,
         file_type=file_type,
+        stop=stop,
         checkpoint_freq=1,
         checkpoint_at_end=True,
         keep_checkpoints_num=None,
diff --git a/rllib/tests/test_rllib_train_and_evaluate.py b/rllib/tests/test_rllib_train_and_evaluate.py
index 6d69a256a3d3..52063d0c51e2 100644
--- a/rllib/tests/test_rllib_train_and_evaluate.py
+++ b/rllib/tests/test_rllib_train_and_evaluate.py
@@ -298,11 +298,12 @@ def test_json_run(self):
     def test_python_run(self):
         assert os.popen(
             f"python {rllib_dir}/scripts.py train file tuned_examples/simple_q/"
-            f"cartpole_simpleq_test.py --type=python"
+            f"cartpole_simpleq_test.py --type=python "
+            f"--stop={'timesteps_total': 50000, 'episode_reward_mean': 200}"
         ).read()
 
     def test_all_example_files_exist(self):
-        """ "The 'example' command now knows about example files,
+        """The 'example' command now knows about example files,
         so we check that they exist."""
         from ray.rllib.common import EXAMPLES
 
diff --git a/rllib/train.py b/rllib/train.py
index b7971464a070..8a9c14ae586e 100755
--- a/rllib/train.py
+++ b/rllib/train.py
@@ -12,7 +12,7 @@
 from ray.tune.schedulers import create_scheduler
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.common import CLIArguments as cli
-from ray.rllib.common import FrameworkEnum, SupportedFileType
+from ray.rllib.common import FrameworkEnum, SupportedFileType, download_example_file
 
 
 def import_backends():
@@ -51,7 +51,7 @@ def _patch_path(path: str):
 
 
 def load_experiments_from_file(
-    config_file: str, file_type: SupportedFileType, checkpoint_config: dict
+    config_file: str, file_type: SupportedFileType, checkpoint_config: dict, stop: str
 ) -> dict:
     """Load experiments from a file. Supports YAML, JSON and Python files.
     If you want to use a Python file, it has to have a 'config' variable
@@ -88,10 +88,8 @@ def load_experiments_from_file(
             }
         }
 
-        # If there's a "stop" dict, add it to the experiment.
-        if hasattr(module, "stop"):
-            stop = getattr(module, "stop")
-            experiments["default"]["stop"] = stop
+        # Add a stopping condition if provided
+        experiments["default"]["stop"] = json.loads(stop)
 
     for key, val in experiments.items():
         experiments[key]["checkpoint_config"] = checkpoint_config
@@ -104,6 +102,8 @@ def file(
     # File-based arguments.
     config_file: str = cli.ConfigFile,
     file_type: SupportedFileType = cli.FileType,
+    # stopping conditions
+    stop: str = cli.Stop,
     # Checkpointing
     checkpoint_freq: int = cli.CheckpointFreq,
     checkpoint_at_end: bool = cli.CheckpointAtEnd,
@@ -137,8 +137,6 @@ def file(
       rllib train file https://raw.githubusercontent.com/ray-project/ray/\
       master/rllib/tuned_examples/ppo/cartpole-ppo.yaml
     """
-    from ray.rllib.common import download_example_file
-
     # Attempt to download the file if it's not found locally.
     config_file, temp_file = download_example_file(
         example_file=config_file, base_url=None
@@ -154,7 +152,9 @@ def file(
         "checkpoint_score_attribute": checkpoint_score_attr,
     }
 
-    experiments = load_experiments_from_file(config_file, file_type, checkpoint_config)
+    experiments = load_experiments_from_file(
+        config_file, file_type, checkpoint_config, stop
+    )
     exp_name = list(experiments.keys())[0]
     algo = experiments[exp_name]["run"]
 
diff --git a/rllib/tuned_examples/a2c/cartpole_a2c.py b/rllib/tuned_examples/a2c/cartpole_a2c.py
index f456297abd74..743bec365e4c 100644
--- a/rllib/tuned_examples/a2c/cartpole_a2c.py
+++ b/rllib/tuned_examples/a2c/cartpole_a2c.py
@@ -1,3 +1,6 @@
+# Run with:
+# rllib train -f cartpole_a2c.py --type python \
+#     --stop={'timesteps_total': 50000, 'episode_reward_mean': 200}"
 from ray.rllib.algorithms.a2c import A2CConfig
 
 
@@ -8,4 +11,3 @@
     .framework("tf")
     .rollouts(num_rollout_workers=0)
 )
-stop = {"episode_reward_mean": 150, "timesteps_total": 500000}
diff --git a/rllib/tuned_examples/a3c/__init__.py b/rllib/tuned_examples/a3c/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/rllib/tuned_examples/a3c/cartpole_a3c.py b/rllib/tuned_examples/a3c/cartpole_a3c.py
new file mode 100644
index 000000000000..0d8dd516c30a
--- /dev/null
+++ b/rllib/tuned_examples/a3c/cartpole_a3c.py
@@ -0,0 +1,13 @@
+# Run with:
+# rllib train -f cartpole_a3c.py --type python \
+#     --stop={'timesteps_total': 20000, 'episode_reward_mean': 150}"
+from ray.rllib.algorithms.a3c import A3CConfig
+
+
+config = (
+    A3CConfig()
+    .training(gamma=0.95)
+    .environment("CartPole-v1")
+    .framework("tf")
+    .rollouts(num_rollout_workers=0)
+)
diff --git a/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py b/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py
index 885892d5aef2..db24e2612c13 100644
--- a/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py
+++ b/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py
@@ -1,3 +1,6 @@
+# Run with:
+# rllib train -f cartpole_simpleq_test.py --type python \
+#     --stop={'timesteps_total': 50000, 'episode_reward_mean': 200}"
 from ray.rllib.algorithms.simple_q import SimpleQConfig
 
 
@@ -7,4 +10,3 @@
     .framework("tf")
     .rollouts(num_rollout_workers=0)
 )
-stop = {"episode_reward_mean": 150, "timesteps_total": 50000}

From 3b901c01d30aca343334e763b9db7ea685e6e033 Mon Sep 17 00:00:00 2001
From: Max Pumperla <max.pumperla@googlemail.com>
Date: Fri, 4 Nov 2022 16:05:43 +0100
Subject: [PATCH 2/5] rm json, rm explicit file type, clean

Signed-off-by: Max Pumperla <max.pumperla@googlemail.com>
---
 rllib/common.py                               | 19 +++++++++++++++----
 rllib/scripts.py                              |  3 +--
 rllib/tests/test_rllib_train_and_evaluate.py  |  8 +-------
 rllib/train.py                                | 11 +++++------
 rllib/tuned_examples/a2c/cartpole-a2c.json    | 15 ---------------
 rllib/tuned_examples/a2c/cartpole_a2c.py      |  2 +-
 rllib/tuned_examples/a3c/cartpole_a3c.py      |  2 +-
 .../simple_q/cartpole_simpleq_test.py         |  2 +-
 8 files changed, 25 insertions(+), 37 deletions(-)
 delete mode 100644 rllib/tuned_examples/a2c/cartpole-a2c.json

diff --git a/rllib/common.py b/rllib/common.py
index a998fe5a9989..0c3fc96020e4 100644
--- a/rllib/common.py
+++ b/rllib/common.py
@@ -24,10 +24,23 @@ class SupportedFileType(str, Enum):
     """Supported file types for RLlib, used for CLI argument validation."""
 
     yaml = "yaml"
-    json = "json"
     python = "python"
 
 
+def get_file_type(config_file: str) -> SupportedFileType:
+    if ".py" in config_file:
+        file_type = SupportedFileType.python
+    elif ".yaml" in config_file or ".yml" in config_file:
+        file_type = SupportedFileType.yaml
+    else:
+        raise ValueError(
+            "Unknown file type for config "
+            "file: {}. Supported extensions: .py, "
+            "yml, yaml.".format(config_file)
+        )
+    return file_type
+
+
 def _create_tune_parser_help():
     """Create a Tune dummy parser to access its 'help' docstrings."""
     parser = _make_parser(
@@ -105,7 +118,7 @@ def get_help(key: str) -> str:
     "`ray.rllib.examples.env.simple_corridor.SimpleCorridor`).",
     config_file="Use the algorithm configuration from this file.",
     filetype="The file type of the config file. Defaults to 'yaml' and can also be "
-    "'json', or 'python'.",
+    "'python'.",
     experiment_name="Name of the subdirectory under `local_dir` to put results in.",
     framework="The identifier of the deep learning framework you want to use."
     "Choose between TensorFlow 1.x ('tf'), TensorFlow 2.x ('tf2'), "
@@ -252,7 +265,6 @@ class CLIArguments:
     },
     "cartpole-a2c": {
         "file": "tuned_examples/a2c/cartpole_a2c.py",
-        "file_type": SupportedFileType.python,
         "stop": "{'timesteps_total': 50000, 'episode_reward_mean': 200}",
         "description": "Runs A2C on the CartPole-v1 environment.",
     },
@@ -263,7 +275,6 @@ class CLIArguments:
     # A3C
     "cartpole-a3c": {
         "file": "tuned_examples/a3c/cartpole_a3c.py",
-        "file_type": SupportedFileType.python,
         "stop": "{'timesteps_total': 20000, 'episode_reward_mean': 150}",
         "description": "Runs A3C on the CartPole-v1 environment.",
     },
diff --git a/rllib/scripts.py b/rllib/scripts.py
index 6bc23f3d8d9a..777c5e1f7540 100644
--- a/rllib/scripts.py
+++ b/rllib/scripts.py
@@ -13,6 +13,7 @@
     SupportedFileType,
     example_help,
     download_example_file,
+    get_file_type,
 )
 
 # Main Typer CLI app
@@ -101,12 +102,10 @@ def run(example_id: str = typer.Argument(..., help="Example ID to run.")):
     example = EXAMPLES[example_id]
     example_file = get_example_file(example_id)
     example_file, temp_file = download_example_file(example_file)
-    file_type = example.get("file_type", SupportedFileType.yaml)
     stop = example.get("stop", "{}")
 
     train_module.file(
         config_file=example_file,
-        file_type=file_type,
         stop=stop,
         checkpoint_freq=1,
         checkpoint_at_end=True,
diff --git a/rllib/tests/test_rllib_train_and_evaluate.py b/rllib/tests/test_rllib_train_and_evaluate.py
index 52063d0c51e2..a7bf73b74acf 100644
--- a/rllib/tests/test_rllib_train_and_evaluate.py
+++ b/rllib/tests/test_rllib_train_and_evaluate.py
@@ -289,16 +289,10 @@ def test_yaml_run(self):
             f"cartpole-simpleq-test.yaml"
         ).read()
 
-    def test_json_run(self):
-        assert os.popen(
-            f"python {rllib_dir}/scripts.py train file tuned_examples/simple_q/"
-            f"cartpole-simpleq-test.json --type=json"
-        ).read()
-
     def test_python_run(self):
         assert os.popen(
             f"python {rllib_dir}/scripts.py train file tuned_examples/simple_q/"
-            f"cartpole_simpleq_test.py --type=python "
+            f"cartpole_simpleq_test.py "
             f"--stop={'timesteps_total': 50000, 'episode_reward_mean': 200}"
         ).read()
 
diff --git a/rllib/train.py b/rllib/train.py
index 8a9c14ae586e..67d4ded2a25a 100755
--- a/rllib/train.py
+++ b/rllib/train.py
@@ -12,7 +12,8 @@
 from ray.tune.schedulers import create_scheduler
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.common import CLIArguments as cli
-from ray.rllib.common import FrameworkEnum, SupportedFileType, download_example_file
+from ray.rllib.common import FrameworkEnum, SupportedFileType
+from ray.rllib.common import download_example_file, get_file_type
 
 
 def import_backends():
@@ -53,16 +54,13 @@ def _patch_path(path: str):
 def load_experiments_from_file(
     config_file: str, file_type: SupportedFileType, checkpoint_config: dict, stop: str
 ) -> dict:
-    """Load experiments from a file. Supports YAML, JSON and Python files.
+    """Load experiments from a file. Supports YAML and Python files.
     If you want to use a Python file, it has to have a 'config' variable
     that is an AlgorithmConfig object."""
 
     if file_type == SupportedFileType.yaml:
         with open(config_file) as f:
             experiments = yaml.safe_load(f)
-    elif file_type == SupportedFileType.json:
-        with open(config_file) as f:
-            experiments = json.load(f)
     else:  # Python file case (ensured by file type enum)
         import importlib
 
@@ -101,7 +99,6 @@ def load_experiments_from_file(
 def file(
     # File-based arguments.
     config_file: str = cli.ConfigFile,
-    file_type: SupportedFileType = cli.FileType,
     # stopping conditions
     stop: str = cli.Stop,
     # Checkpointing
@@ -152,6 +149,8 @@ def file(
         "checkpoint_score_attribute": checkpoint_score_attr,
     }
 
+    file_type = get_file_type(config_file)
+
     experiments = load_experiments_from_file(
         config_file, file_type, checkpoint_config, stop
     )
diff --git a/rllib/tuned_examples/a2c/cartpole-a2c.json b/rllib/tuned_examples/a2c/cartpole-a2c.json
deleted file mode 100644
index bfe5781f92ee..000000000000
--- a/rllib/tuned_examples/a2c/cartpole-a2c.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "cartpole-a2c":{
-    "env":"CartPole-v0",
-    "run":"A2C",
-    "stop":{
-      "episode_reward_mean":150,
-      "timesteps_total":500000
-    },
-    "config":{
-      "framework":"tf",
-      "num_workers":0,
-      "lr":0.001
-    }
-  }
-}
\ No newline at end of file
diff --git a/rllib/tuned_examples/a2c/cartpole_a2c.py b/rllib/tuned_examples/a2c/cartpole_a2c.py
index 743bec365e4c..e48d8c466e09 100644
--- a/rllib/tuned_examples/a2c/cartpole_a2c.py
+++ b/rllib/tuned_examples/a2c/cartpole_a2c.py
@@ -1,5 +1,5 @@
 # Run with:
-# rllib train -f cartpole_a2c.py --type python \
+# rllib train file cartpole_a2c.py \
 #     --stop={'timesteps_total': 50000, 'episode_reward_mean': 200}"
 from ray.rllib.algorithms.a2c import A2CConfig
 
diff --git a/rllib/tuned_examples/a3c/cartpole_a3c.py b/rllib/tuned_examples/a3c/cartpole_a3c.py
index 0d8dd516c30a..464f1ebcb9f8 100644
--- a/rllib/tuned_examples/a3c/cartpole_a3c.py
+++ b/rllib/tuned_examples/a3c/cartpole_a3c.py
@@ -1,5 +1,5 @@
 # Run with:
-# rllib train -f cartpole_a3c.py --type python \
+# rllib train file cartpole_a3c.py \
 #     --stop={'timesteps_total': 20000, 'episode_reward_mean': 150}"
 from ray.rllib.algorithms.a3c import A3CConfig
 
diff --git a/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py b/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py
index db24e2612c13..ba2816968870 100644
--- a/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py
+++ b/rllib/tuned_examples/simple_q/cartpole_simpleq_test.py
@@ -1,5 +1,5 @@
 # Run with:
-# rllib train -f cartpole_simpleq_test.py --type python \
+# rllib train -f cartpole_simpleq_test.py\
 #     --stop={'timesteps_total': 50000, 'episode_reward_mean': 200}"
 from ray.rllib.algorithms.simple_q import SimpleQConfig
 

From 1c870b172b8ca4cbda8b07d415026867e49b5c26 Mon Sep 17 00:00:00 2001
From: Max Pumperla <max.pumperla@googlemail.com>
Date: Fri, 4 Nov 2022 16:39:14 +0100
Subject: [PATCH 3/5] lint

Signed-off-by: Max Pumperla <max.pumperla@googlemail.com>
---
 rllib/train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rllib/train.py b/rllib/train.py
index 0d23a45ddb6c..6a1dad12aa43 100644
--- a/rllib/train.py
+++ b/rllib/train.py
@@ -54,8 +54,8 @@ def _patch_path(path: str):
 def load_experiments_from_file(
     config_file: str,
     file_type: SupportedFileType,
+    stop: str,
     checkpoint_config: Optional[dict] = None,
-    stop: str
 ) -> dict:
     """Load experiments from a file. Supports YAML and Python files.
     If you want to use a Python file, it has to have a 'config' variable
@@ -155,7 +155,7 @@ def file(
     file_type = get_file_type(config_file)
 
     experiments = load_experiments_from_file(
-        config_file, file_type, checkpoint_config, stop
+        config_file, file_type, stop, checkpoint_config
     )
     exp_name = list(experiments.keys())[0]
     algo = experiments[exp_name]["run"]

From e209664832b2ef5e1b604a267305a1f3ed4573a9 Mon Sep 17 00:00:00 2001
From: Max Pumperla <max.pumperla@googlemail.com>
Date: Fri, 4 Nov 2022 16:56:20 +0100
Subject: [PATCH 4/5] make stop optional

Signed-off-by: Max Pumperla <max.pumperla@googlemail.com>
---
 rllib/train.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/rllib/train.py b/rllib/train.py
index 6a1dad12aa43..bb36a6ee1cfd 100644
--- a/rllib/train.py
+++ b/rllib/train.py
@@ -54,7 +54,7 @@ def _patch_path(path: str):
 def load_experiments_from_file(
     config_file: str,
     file_type: SupportedFileType,
-    stop: str,
+    stop: Optional[str] = None,
     checkpoint_config: Optional[dict] = None,
 ) -> dict:
     """Load experiments from a file. Supports YAML and Python files.
@@ -90,7 +90,8 @@ def load_experiments_from_file(
         }
 
         # Add a stopping condition if provided
-        experiments["default"]["stop"] = json.loads(stop)
+        if stop:
+            experiments["default"]["stop"] = json.loads(stop)
 
     for key, val in experiments.items():
         experiments[key]["checkpoint_config"] = checkpoint_config or {}

From 79d5b2938539745dd41142fbd63ed844254b1c45 Mon Sep 17 00:00:00 2001
From: Max Pumperla <max.pumperla@googlemail.com>
Date: Fri, 4 Nov 2022 17:40:17 +0100
Subject: [PATCH 5/5] rm unused imports

Signed-off-by: Max Pumperla <max.pumperla@googlemail.com>
---
 rllib/scripts.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/rllib/scripts.py b/rllib/scripts.py
index 777c5e1f7540..b20b97c9d146 100644
--- a/rllib/scripts.py
+++ b/rllib/scripts.py
@@ -10,10 +10,8 @@
 from ray.rllib.common import (
     EXAMPLES,
     FrameworkEnum,
-    SupportedFileType,
     example_help,
     download_example_file,
-    get_file_type,
 )
 
 # Main Typer CLI app