From 3421c1840c346e5733701184695932dfda1d1e98 Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Thu, 10 Oct 2024 18:12:38 +0200 Subject: [PATCH] [RLlib] Cleanup examples folder vol. 25: Remove some old API stack examples. (#47970) Signed-off-by: ujjawal-khare --- doc/source/rllib/rllib-examples.rst | 4 - doc/source/rllib/rllib-models.rst | 56 +----- rllib/BUILD | 175 ------------------ rllib/examples/cartpole_lstm.py | 94 ---------- rllib/examples/custom_model_api.py | 109 ----------- .../examples/custom_model_loss_and_metrics.py | 117 ------------ rllib/examples/learners/ppo_tuner.py | 61 ------ 7 files changed, 2 insertions(+), 614 deletions(-) delete mode 100644 rllib/examples/cartpole_lstm.py delete mode 100644 rllib/examples/custom_model_api.py delete mode 100644 rllib/examples/custom_model_loss_and_metrics.py delete mode 100644 rllib/examples/learners/ppo_tuner.py diff --git a/doc/source/rllib/rllib-examples.rst b/doc/source/rllib/rllib-examples.rst index 6457ebd17187..5a2c4dca69f6 100644 --- a/doc/source/rllib/rllib-examples.rst +++ b/doc/source/rllib/rllib-examples.rst @@ -254,12 +254,8 @@ RLModules - |old_stack| `How to using the "Repeated" space of RLlib for variable lengths observations `__: How to use RLlib's `Repeated` space to handle variable length observations. - |old_stack| `How to write a custom Keras model `__: - Example of using a custom Keras model. -- |old_stack| `How to register a custom model with supervised loss `__: Example of defining and registering a custom model with a supervised loss. - |old_stack| `How to train with batch normalization `__: - Example of adding batch norm layers to a custom model. -- |old_stack| `How to write a custom model with its custom API `__: Shows how to define a custom Model API in RLlib, such that it can be used inside certain algorithms. - |old_stack| `How to write a "trajectory ciew API" utilizing model `__: An example on how a model can use the trajectory view API to specify its own input. diff --git a/doc/source/rllib/rllib-models.rst b/doc/source/rllib/rllib-models.rst index 5e3badd3b8e3..717c6bb196c6 100644 --- a/doc/source/rllib/rllib-models.rst +++ b/doc/source/rllib/rllib-models.rst @@ -364,59 +364,7 @@ calculating head on top of your policy model. In order to expand a Model's API, define and implement a new method (e.g. ``get_q_values()``) in your TF- or TorchModelV2 sub-class. You can now wrap this new API either around RLlib's default models or around -your custom (``forward()``-overriding) model classes. Here are two examples that illustrate how to do this: - -**The Q-head API: Adding a dueling layer on top of a default RLlib model**. - -The following code adds a ``get_q_values()`` method to the automatically chosen -default Model (e.g. a ``FullyConnectedNetwork`` if the observation space is a 1D Box -or Discrete): - -.. literalinclude:: ../../../rllib/examples/_old_api_stack/models/custom_model_api.py - :language: python - :start-after: __sphinx_doc_model_api_1_begin__ - :end-before: __sphinx_doc_model_api_1_end__ - -Now, for your algorithm that needs to have this model API to work properly (e.g. DQN), -you use this following code to construct the complete final Model using the -``ModelCatalog.get_model_v2`` factory function (`code here `__): - -.. literalinclude:: ../../../rllib/examples/custom_model_api.py - :language: python - :start-after: __sphinx_doc_model_construct_1_begin__ - :end-before: __sphinx_doc_model_construct_1_end__ - -With the model object constructed above, you can get the underlying intermediate output (before the dueling head) -by calling ``my_dueling_model`` directly (``out = my_dueling_model([input_dict])``), and then passing ``out`` into -your custom ``get_q_values`` method: ``q_values = my_dueling_model.get_q_values(out)``. - - -**The single Q-value API for SAC**. - -Our DQN model from above takes an observation and outputs one Q-value per (discrete) action. -Continuous SAC - on the other hand - uses Models that calculate one Q-value only -for a single (**continuous**) action, given an observation and that particular action. - -Let's take a look at how we would construct this API and wrap it around a custom model: - -.. literalinclude:: ../../../rllib/examples/_old_api_stack/models/custom_model_api.py - :language: python - :start-after: __sphinx_doc_model_api_2_begin__ - :end-before: __sphinx_doc_model_api_2_end__ - -Now, for your algorithm that needs to have this model API to work properly (e.g. SAC), -you use this following code to construct the complete final Model using the -``ModelCatalog.get_model_v2`` factory function (`code here `__): - -.. literalinclude:: ../../../rllib/examples/custom_model_api.py - :language: python - :start-after: __sphinx_doc_model_construct_2_begin__ - :end-before: __sphinx_doc_model_construct_2_end__ - -With the model object constructed above, you can get the underlying intermediate output (before the q-head) -by calling ``my_cont_action_q_model`` directly (``out = my_cont_action_q_model([input_dict])``), and then passing ``out`` -and some action into your custom ``get_single_q_value`` method: -``q_value = my_cont_action_q_model.get_signle_q_value(out, action)``. +your custom (``forward()``-overriding) model classes. More examples for Building Custom Models @@ -505,7 +453,7 @@ Supervised Model Losses You can mix supervised losses into any RLlib algorithm through custom models. For example, you can add an imitation learning loss on expert experiences, or a self-supervised autoencoder loss within the model. These losses can be defined over either policy evaluation inputs, or data read from `offline storage `__. -**TensorFlow**: To add a supervised loss to a custom TF model, you need to override the ``custom_loss()`` method. This method takes in the existing policy loss for the algorithm, which you can add your own supervised loss to before returning. For debugging, you can also return a dictionary of scalar tensors in the ``metrics()`` method. Here is a `runnable example `__ of adding an imitation loss to CartPole training that is defined over a `offline dataset `__. +**TensorFlow**: To add a supervised loss to a custom TF model, you need to override the ``custom_loss()`` method. This method takes in the existing policy loss for the algorithm, which you can add your own supervised loss to before returning. For debugging, you can also return a dictionary of scalar tensors in the ``metrics()`` method. **PyTorch**: There is no explicit API for adding losses to custom torch models. However, you can modify the loss in the policy definition directly. Like for TF models, offline datasets can be incorporated by creating an input reader and calling ``reader.next()`` in the loss forward pass. diff --git a/rllib/BUILD b/rllib/BUILD index 469a5c57a950..7c2259b84768 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -2836,107 +2836,6 @@ py_test( args = ["--enable-new-api-stack", "--as-test"] ) - -#@OldAPIStack @HybridAPIStack -py_test( - name = "examples/learners/ppo_tuner_local_cpu_torch", - main = "examples/learners/ppo_tuner.py", - tags = ["team:rllib", "examples"], - size = "medium", - srcs = ["examples/learners/ppo_tuner.py"], - args = ["--framework=torch", "--config=local-cpu"] -) - -#@OldAPIStack @HybridAPIStack -py_test( - name = "examples/learners/ppo_tuner_local_cpu_tf2", - main = "examples/learners/ppo_tuner.py", - tags = ["team:rllib", "examples"], - size = "medium", - srcs = ["examples/learners/ppo_tuner.py"], - args = ["--framework=tf2", "--config=local-cpu"] -) - -#@OldAPIStack @HybridAPIStack -py_test( - name = "examples/learners/ppo_tuner_local_gpu_torch", - main = "examples/learners/ppo_tuner.py", - tags = ["team:rllib", "examples", "gpu"], - size = "medium", - srcs = ["examples/learners/ppo_tuner.py"], - args = ["--framework=torch", "--config=local-gpu"] -) - -#@OldAPIStack @HybridAPIStack -py_test( - name = "examples/learners/ppo_tuner_local_gpu_tf2", - main = "examples/learners/ppo_tuner.py", - tags = ["team:rllib", "examples", "gpu", "exclusive"], - size = "medium", - srcs = ["examples/learners/ppo_tuner.py"], - args = ["--framework=tf2", "--config=local-gpu"] -) - -#@OldAPIStack @HybridAPIStack -py_test( - name = "examples/learners/ppo_tuner_remote_cpu_torch", - main = "examples/learners/ppo_tuner.py", - tags = ["team:rllib", "examples"], - size = "medium", - srcs = ["examples/learners/ppo_tuner.py"], - args = ["--framework=torch", "--config=remote-cpu"] -) - -#@OldAPIStack @HybridAPIStack -py_test( - name = "examples/learners/ppo_tuner_remote_cpu_tf2", - main = "examples/learners/ppo_tuner.py", - tags = ["team:rllib", "examples"], - size = "medium", - srcs = ["examples/learners/ppo_tuner.py"], - args = ["--framework=tf2", "--config=remote-cpu"] -) - -#@OldAPIStack @HybridAPIStack -py_test( - name = "examples/learners/ppo_tuner_remote_gpu_torch", - main = "examples/learners/ppo_tuner.py", - tags = ["team:rllib", "examples", "gpu", "exclusive"], - size = "medium", - srcs = ["examples/learners/ppo_tuner.py"], - args = ["--framework=torch", "--config=remote-gpu"] -) - -#@OldAPIStack @HybridAPIStack -py_test( - name = "examples/learners/ppo_tuner_remote_gpu_tf2", - main = "examples/learners/ppo_tuner.py", - tags = ["team:rllib", "examples", "gpu", "exclusive"], - size = "medium", - srcs = ["examples/learners/ppo_tuner.py"], - args = ["--framework=tf2", "--config=remote-gpu"] -) - -#@OldAPIStack @HybridAPIStack -py_test( - name = "examples/learners/ppo_tuner_multi_gpu_torch", - main = "examples/learners/ppo_tuner.py", - tags = ["team:rllib", "examples", "multi_gpu", "exclusive"], - size = "medium", - srcs = ["examples/learners/ppo_tuner.py"], - args = ["--framework=torch", "--config=multi-gpu-ddp"] -) - -#@OldAPIStack @HybridAPIStack -py_test( - name = "examples/learners/ppo_tuner_multi_gpu_tf2", - main = "examples/learners/ppo_tuner.py", - tags = ["team:rllib", "examples", "multi_gpu", "exclusive"], - size = "medium", - srcs = ["examples/learners/ppo_tuner.py"], - args = ["--framework=tf2", "--config=multi-gpu-ddp"] -) - # subdirectory: multi_agent/ # .................................... py_test( @@ -3256,56 +3155,6 @@ py_test( args = ["--as-test", "--framework=torch", "--stop-reward=-0.012", "--num-cpus=4"] ) -#@OldAPIStack -py_test( - name = "examples/cartpole_lstm_impala_tf2", - main = "examples/cartpole_lstm.py", - tags = ["team:rllib", "exclusive", "examples"], - size = "medium", - srcs = ["examples/cartpole_lstm.py"], - args = ["--run=IMPALA", "--as-test", "--framework=tf2", "--stop-reward=28", "--num-cpus=4"] -) - -#@OldAPIStack -py_test( - name = "examples/cartpole_lstm_impala_torch", - main = "examples/cartpole_lstm.py", - tags = ["team:rllib", "exclusive", "examples"], - size = "medium", - srcs = ["examples/cartpole_lstm.py"], - args = ["--run=IMPALA", "--as-test", "--framework=torch", "--stop-reward=28", "--num-cpus=4"] -) - -#@OldAPIStack -py_test( - name = "examples/cartpole_lstm_ppo_tf2", - main = "examples/cartpole_lstm.py", - tags = ["team:rllib", "exclusive", "examples"], - size = "large", - srcs = ["examples/cartpole_lstm.py"], - args = ["--run=PPO", "--as-test", "--framework=tf2", "--stop-reward=28", "--num-cpus=4"] -) - -#@OldAPIStack -py_test( - name = "examples/cartpole_lstm_ppo_torch", - main = "examples/cartpole_lstm.py", - tags = ["team:rllib", "exclusive", "examples"], - size = "medium", - srcs = ["examples/cartpole_lstm.py"], - args = ["--run=PPO", "--as-test", "--framework=torch", "--stop-reward=28", "--num-cpus=4"] -) - -#@OldAPIStack -py_test( - name = "examples/cartpole_lstm_ppo_torch_with_prev_a_and_r", - main = "examples/cartpole_lstm.py", - tags = ["team:rllib", "exclusive", "examples"], - size = "medium", - srcs = ["examples/cartpole_lstm.py"], - args = ["--run=PPO", "--as-test", "--framework=torch", "--stop-reward=28", "--num-cpus=4", "--use-prev-action", "--use-prev-reward"] -) - #@OldAPIStack py_test( name = "examples/centralized_critic_tf", @@ -3356,30 +3205,6 @@ py_test( args = ["--stop-iters=2"] ) -#@OldAPIStack -py_test( - name = "examples/custom_model_loss_and_metrics_ppo_tf", - main = "examples/custom_model_loss_and_metrics.py", - tags = ["team:rllib", "exclusive", "examples"], - size = "small", - # Include the json data file. - data = ["tests/data/cartpole/small.json"], - srcs = ["examples/custom_model_loss_and_metrics.py"], - args = ["--run=PPO", "--stop-iters=1", "--framework=tf","--input-files=tests/data/cartpole"] -) - -#@OldAPIStack -py_test( - name = "examples/custom_model_loss_and_metrics_ppo_torch", - main = "examples/custom_model_loss_and_metrics.py", - tags = ["team:rllib", "exclusive", "examples"], - size = "small", - # Include the json data file. - data = ["tests/data/cartpole/small.json"], - srcs = ["examples/custom_model_loss_and_metrics.py"], - args = ["--run=PPO", "--framework=torch", "--stop-iters=1", "--input-files=tests/data/cartpole"] -) - py_test( name = "examples/custom_recurrent_rnn_tokenizer_repeat_after_me_tf2", main = "examples/custom_recurrent_rnn_tokenizer.py", diff --git a/rllib/examples/cartpole_lstm.py b/rllib/examples/cartpole_lstm.py deleted file mode 100644 index c7454161ab06..000000000000 --- a/rllib/examples/cartpole_lstm.py +++ /dev/null @@ -1,94 +0,0 @@ -# @OldAPIStack - -# TODO (sven): Move this script to `examples/rl_modules/...` - -import argparse -import os - -from ray.air.constants import TRAINING_ITERATION -from ray.rllib.examples.envs.classes.stateless_cartpole import StatelessCartPole -from ray.rllib.utils.metrics import ( - ENV_RUNNER_RESULTS, - EPISODE_RETURN_MEAN, - NUM_ENV_STEPS_SAMPLED_LIFETIME, -) -from ray.rllib.utils.test_utils import check_learning_achieved -from ray.tune.registry import get_trainable_cls - -parser = argparse.ArgumentParser() -parser.add_argument( - "--run", type=str, default="PPO", help="The RLlib-registered algorithm to use." -) -parser.add_argument("--num-cpus", type=int, default=0) -parser.add_argument( - "--framework", - choices=["tf", "tf2", "torch"], - default="torch", - help="The DL framework specifier.", -) -parser.add_argument("--use-prev-action", action="store_true") -parser.add_argument("--use-prev-reward", action="store_true") -parser.add_argument( - "--as-test", - action="store_true", - help="Whether this script should be run as a test: --stop-reward must " - "be achieved within --stop-timesteps AND --stop-iters.", -) -parser.add_argument( - "--stop-iters", type=int, default=200, help="Number of iterations to train." -) -parser.add_argument( - "--stop-timesteps", type=int, default=100000, help="Number of timesteps to train." -) -parser.add_argument( - "--stop-reward", type=float, default=150.0, help="Reward at which we stop training." -) - -if __name__ == "__main__": - import ray - from ray import air, tune - - args = parser.parse_args() - - ray.init() - - algo_cls = get_trainable_cls(args.run) - config = algo_cls.get_default_config() - - config.environment(env=StatelessCartPole).resources( - num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")) - ).framework(args.framework).reporting(min_time_s_per_iteration=0.1).training( - model={ - "use_lstm": True, - "lstm_cell_size": 32, - "lstm_use_prev_action": args.use_prev_action, - "lstm_use_prev_reward": args.use_prev_reward, - } - ) - - if args.run == "PPO": - config.training(num_epochs=5, vf_loss_coeff=0.0001, train_batch_size=512) - config.model["vf_share_layers"] = True - elif args.run == "IMPALA": - config.env_runners(num_env_runners=2) - config.resources(num_gpus=0) - config.training(vf_loss_coeff=0.01) - - stop = { - TRAINING_ITERATION: args.stop_iters, - NUM_ENV_STEPS_SAMPLED_LIFETIME: args.stop_timesteps, - f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": args.stop_reward, - } - - tuner = tune.Tuner( - args.run, - param_space=config.to_dict(), - run_config=air.RunConfig( - stop=stop, - ), - ) - results = tuner.fit() - - if args.as_test: - check_learning_achieved(results, args.stop_reward) - ray.shutdown() diff --git a/rllib/examples/custom_model_api.py b/rllib/examples/custom_model_api.py deleted file mode 100644 index e1e6705bbf77..000000000000 --- a/rllib/examples/custom_model_api.py +++ /dev/null @@ -1,109 +0,0 @@ -# @OldAPIStack -import argparse -from gymnasium.spaces import Box, Discrete -import numpy as np - -from ray.rllib.examples._old_api_stack.models.custom_model_api import ( - DuelingQModel, - TorchDuelingQModel, - ContActionQModel, - TorchContActionQModel, -) -from ray.rllib.models.catalog import ModelCatalog, MODEL_DEFAULTS -from ray.rllib.policy.sample_batch import SampleBatch -from ray.rllib.utils.framework import try_import_tf, try_import_torch - -tf1, tf, tfv = try_import_tf() -torch, _ = try_import_torch() - -parser = argparse.ArgumentParser() -parser.add_argument( - "--framework", - choices=["tf", "tf2", "torch"], - default="torch", - help="The DL framework specifier.", -) - -if __name__ == "__main__": - args = parser.parse_args() - - # Test API wrapper for dueling Q-head. - - obs_space = Box(-1.0, 1.0, (3,)) - action_space = Discrete(3) - - # Run in eager mode for value checking and debugging. - tf1.enable_eager_execution() - - # __sphinx_doc_model_construct_1_begin__ - my_dueling_model = ModelCatalog.get_model_v2( - obs_space=obs_space, - action_space=action_space, - num_outputs=action_space.n, - model_config=MODEL_DEFAULTS, - framework=args.framework, - # Providing the `model_interface` arg will make the factory - # wrap the chosen default model with our new model API class - # (DuelingQModel). This way, both `forward` and `get_q_values` - # are available in the returned class. - model_interface=DuelingQModel - if args.framework != "torch" - else TorchDuelingQModel, - name="dueling_q_model", - ) - # __sphinx_doc_model_construct_1_end__ - - batch_size = 10 - input_ = np.array([obs_space.sample() for _ in range(batch_size)]) - # Note that for PyTorch, you will have to provide torch tensors here. - if args.framework == "torch": - input_ = torch.from_numpy(input_) - - input_dict = SampleBatch(obs=input_, _is_training=False) - out, state_outs = my_dueling_model(input_dict=input_dict) - assert out.shape == (10, 256) - # Pass `out` into `get_q_values` - q_values = my_dueling_model.get_q_values(out) - assert q_values.shape == (10, action_space.n) - - # Test API wrapper for single value Q-head from obs/action input. - - obs_space = Box(-1.0, 1.0, (3,)) - action_space = Box(-1.0, -1.0, (2,)) - - # __sphinx_doc_model_construct_2_begin__ - my_cont_action_q_model = ModelCatalog.get_model_v2( - obs_space=obs_space, - action_space=action_space, - num_outputs=2, - model_config=MODEL_DEFAULTS, - framework=args.framework, - # Providing the `model_interface` arg will make the factory - # wrap the chosen default model with our new model API class - # (DuelingQModel). This way, both `forward` and `get_q_values` - # are available in the returned class. - model_interface=ContActionQModel - if args.framework != "torch" - else TorchContActionQModel, - name="cont_action_q_model", - ) - # __sphinx_doc_model_construct_2_end__ - - batch_size = 10 - input_ = np.array([obs_space.sample() for _ in range(batch_size)]) - - # Note that for PyTorch, you will have to provide torch tensors here. - if args.framework == "torch": - input_ = torch.from_numpy(input_) - - input_dict = SampleBatch(obs=input_, _is_training=False) - # Note that for PyTorch, you will have to provide torch tensors here. - out, state_outs = my_cont_action_q_model(input_dict=input_dict) - assert out.shape == (10, 256) - # Pass `out` and an action into `my_cont_action_q_model` - action = np.array([action_space.sample() for _ in range(batch_size)]) - if args.framework == "torch": - action = torch.from_numpy(action) - - q_value = my_cont_action_q_model.get_single_q_value(out, action) - assert q_value.shape == (10, 1) diff --git a/rllib/examples/custom_model_loss_and_metrics.py b/rllib/examples/custom_model_loss_and_metrics.py deleted file mode 100644 index ccb3d8e1acd0..000000000000 --- a/rllib/examples/custom_model_loss_and_metrics.py +++ /dev/null @@ -1,117 +0,0 @@ -# @OldAPIStack - -# Users should just inherit the Learner and extend the loss_fn. -# TODO (sven): Move this example script to `examples/learners/...` - -"""Example of using custom_loss() with an imitation learning loss under the Policy -and ModelV2 API. - -The default input file is too small to learn a good policy, but you can -generate new experiences for IL training as follows: - -To generate experiences: -$ ./train.py --run=PG --config='{"output": "/tmp/cartpole"}' --env=CartPole-v1 - -To train on experiences with joint PG + IL loss: -$ python custom_loss.py --input-files=/tmp/cartpole -""" - -import argparse -from pathlib import Path -import os - -import ray -from ray import air, tune -from ray.air.constants import TRAINING_ITERATION -from ray.rllib.core import DEFAULT_MODULE_ID -from ray.rllib.examples._old_api_stack.models.custom_loss_model import ( - CustomLossModel, - TorchCustomLossModel, -) -from ray.rllib.models import ModelCatalog -from ray.rllib.utils.framework import try_import_tf -from ray.rllib.utils.metrics.learner_info import LEARNER_INFO, LEARNER_STATS_KEY -from ray.tune.registry import get_trainable_cls - -tf1, tf, tfv = try_import_tf() - -parser = argparse.ArgumentParser() -parser.add_argument( - "--run", type=str, default="PPO", help="The RLlib-registered algorithm to use." -) -parser.add_argument( - "--framework", - choices=["tf", "tf2", "torch"], - default="torch", - help="The DL framework specifier.", -) -parser.add_argument("--stop-iters", type=int, default=200) -parser.add_argument( - "--input-files", - type=str, - default=os.path.join( - os.path.dirname(os.path.abspath(__file__)), "../tests/data/cartpole/small.json" - ), -) - -if __name__ == "__main__": - ray.init() - args = parser.parse_args() - - # Bazel makes it hard to find files specified in `args` (and `data`). - # Look for them here. - if not os.path.exists(args.input_files): - # This script runs in the ray/rllib/examples dir. - rllib_dir = Path(__file__).parent.parent - input_dir = rllib_dir.absolute().joinpath(args.input_files) - args.input_files = str(input_dir) - - ModelCatalog.register_custom_model( - "custom_loss", - TorchCustomLossModel if args.framework == "torch" else CustomLossModel, - ) - - config = ( - get_trainable_cls(args.run) - .get_default_config() - .environment("CartPole-v1") - .framework(args.framework) - .env_runners(num_env_runners=0) - .training( - model={ - "custom_model": "custom_loss", - "custom_model_config": { - "input_files": args.input_files, - }, - }, - ) - # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. - .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))) - ) - - stop = {TRAINING_ITERATION: args.stop_iters} - - tuner = tune.Tuner( - args.run, - param_space=config, - run_config=air.RunConfig(stop=stop, verbose=1), - ) - results = tuner.fit() - info = results.get_best_result().metrics["info"] - - # Torch metrics structure. - if args.framework == "torch": - assert LEARNER_STATS_KEY in info[LEARNER_INFO][DEFAULT_MODULE_ID] - assert "model" in info[LEARNER_INFO][DEFAULT_MODULE_ID] - assert "custom_metrics" in info[LEARNER_INFO][DEFAULT_MODULE_ID] - - # TODO: (sven) Make sure the metrics structure gets unified between - # tf and torch. Tf should work like current torch: - # info: - # learner: - # [policy_id] - # learner_stats: [return values of policy's `stats_fn`] - # model: [return values of ModelV2's `metrics` method] - # custom_metrics: [return values of callback: `on_learn_on_batch`] - else: - assert "model" in info[LEARNER_INFO][DEFAULT_MODULE_ID][LEARNER_STATS_KEY] diff --git a/rllib/examples/learners/ppo_tuner.py b/rllib/examples/learners/ppo_tuner.py deleted file mode 100644 index a27e292b9efa..000000000000 --- a/rllib/examples/learners/ppo_tuner.py +++ /dev/null @@ -1,61 +0,0 @@ -import argparse - -import ray -from ray import air, tune -from ray.air.constants import TRAINING_ITERATION -from ray.rllib.algorithms.ppo import PPOConfig - -LEARNER_CONFIG = { - "remote-cpu": {"num_learners": 1}, - "remote-gpu": {"num_learners": 1, "num_gpus_per_learner": 1}, - "multi-gpu-ddp": { - "num_learners": 2, - "num_gpus_per_learner": 1, - }, - "local-cpu": {}, - "local-gpu": {"num_gpus_per_learner": 1}, -} - - -def _parse_args(): - - parser = argparse.ArgumentParser() - parser.add_argument( - "--config", - type=str, - default="local-cpu", - ) - - parser.add_argument( - "--framework", - choices=["tf2", "torch"], # tf will be deprecated with the new Learner stack - default="torch", - ) - - return parser.parse_args() - - -if __name__ == "__main__": - args = _parse_args() - - ray.init() - - config = ( - PPOConfig() - .framework(args.framework) - .environment("CartPole-v1") - .learners(**LEARNER_CONFIG[args.config]) - ) - - print("Testing with learner config: ", LEARNER_CONFIG[args.config]) - print("Testing with framework: ", args.framework) - print("-" * 80) - tuner = tune.Tuner( - "PPO", - param_space=config.to_dict(), - run_config=air.RunConfig( - stop={TRAINING_ITERATION: 1}, - failure_config=air.FailureConfig(fail_fast="raise"), - ), - ) - tuner.fit()