Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] Cleanup examples folder 07: Translate custom_experiment, custom_logger, custom_progress_reporter to new API stack. #44735

11 changes: 3 additions & 8 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2834,33 +2834,28 @@ py_test(

# subdirectory: ray_tune/
# ....................................
#@OldAPIStack
py_test(
name = "examples/ray_tune/custom_experiment",
main = "examples/ray_tune/custom_experiment.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/ray_tune/custom_experiment.py"],
args = ["--train-iterations=10"]
)

#@OldAPIStack
py_test(
name = "examples/ray_tune/custom_logger",
main = "examples/ray_tune/custom_logger.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/ray_tune/custom_logger.py"],
args = ["--stop-iters=3"]
)

#@OldAPIStack
py_test(
name = "examples/ray_tune/custom_train_function",
main = "examples/ray_tune/custom_train_function.py",
name = "examples/ray_tune/custom_progress_reporter",
main = "examples/ray_tune/custom_progress_reporter.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/ray_tune/custom_train_function.py"],
srcs = ["examples/ray_tune/custom_progress_reporter.py"],
)

# subdirectory: rl_modules/
Expand Down
4 changes: 4 additions & 0 deletions rllib/algorithms/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2328,6 +2328,10 @@ def load_checkpoint(self, checkpoint_dir: str) -> None:
if self.config._enable_new_api_stack:
learner_state_dir = os.path.join(checkpoint_dir, "learner")
self.learner_group.load_state(learner_state_dir)
# Make also sure, all training EnvRunners get the just loaded weights.
weights = self.learner_group.get_weights()
self.workers.local_worker().set_weights(weights)
self.workers.sync_weights()

# Call the `on_checkpoint_loaded` callback.
self.callbacks.on_checkpoint_loaded(algorithm=self)
Expand Down
11 changes: 11 additions & 0 deletions rllib/algorithms/algorithm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4064,6 +4064,17 @@ def _validate_new_api_stack_settings(self):
# TODO (sven): Once everything is on the new API stack, we won't need this method
# anymore.
def _validate_to_be_deprecated_settings(self):
# Env task fn will be deprecated.
if self._enable_new_api_stack and self.env_task_fn is not None:
deprecation_warning(
old="AlgorithmConfig.env_task_fn",
help="The `env_task_fn` API is not supported on the new API stack! "
"Curriculum learning should instead be implemented solely via "
"custom callbacks. Check out our curriculum learning example "
"script for more information: "
"https://github.com/ray-project/ray/blob/master/rllib/examples/curriculum/curriculum_learning.py", # noqa
)

if self.preprocessor_pref not in ["rllib", "deepmind", None]:
raise ValueError(
"`config.preprocessor_pref` must be either 'rllib', 'deepmind' or None!"
Expand Down
51 changes: 48 additions & 3 deletions rllib/examples/evaluation/custom_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,51 @@
- It runs a defined number of episodes for evaluation purposes.
- It collects the metrics from those runs, summarizes these metrics and returns them.


How to run this script
----------------------
`python [script file name].py --enable-new-api-stack

You can switch off custom evaluation (and use RLlib's default evaluation procedure)
with the `--no-custom-eval` flag.

You can switch on parallel evaluation to training using the
`--evaluation-parallel-to-training` flag. See this example script here:
https://github.com/ray-project/ray/blob/master/rllib/examples/evaluation/evaluation_parallel_to_training.py # noqa
for more details on running evaluation parallel to training.

For debugging, use the following additional command line options
`--no-tune --num-env-runners=0`
which should allow you to set breakpoints anywhere in the RLlib code and
have the execution stop there for inspection and debugging.

For logging to your WandB account, use:
`--wandb-key=[your WandB API key] --wandb-project=[some project name]
--wandb-run-name=[optional: WandB run name (within the defined project)]`


Results to expect
-----------------
You should see the following (or very similar) console output when running this script.
Note that for each iteration, due to the definition of our custom evaluation function,
we run 3 evaluation rounds per single training round.

...
Training iteration 1 -> evaluation round 0
Training iteration 1 -> evaluation round 1
Training iteration 1 -> evaluation round 2
...
...
+--------------------------------+------------+-----------------+--------+
| Trial name | status | loc | iter |
|--------------------------------+------------+-----------------+--------+
| PPO_SimpleCorridor_06582_00000 | TERMINATED | 127.0.0.1:69905 | 4 |
+--------------------------------+------------+-----------------+--------+
+------------------+-------+----------+--------------------+
| total time (s) | ts | reward | episode_len_mean |
|------------------+-------+----------+--------------------|
| 26.1973 | 16000 | 0.872034 | 13.7966 |
+------------------+-------+----------+--------------------+
"""
from ray.rllib.algorithms.algorithm import Algorithm
from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
Expand Down Expand Up @@ -69,10 +114,10 @@ def custom_eval_function(algorithm: Algorithm, eval_workers: WorkerSet) -> Resul
# processing.
rollout_metrics = []

# For demonstration purposes, run through some arbitrary number of evaluation
# round within this one call. Note that this function is called once per
# For demonstration purposes, run through some number of evaluation
# rounds within this one call. Note that this function is called once per
# training iteration (`Algorithm.train()` call) OR once per `Algorithm.evaluate()`
# (which may be called manually by the user).
# (which can be called manually by the user).
for i in range(3):
print(f"Training iteration {algorithm.iteration} -> evaluation round {i}")
# Sample episodes from the EnvRunners AND have them return only the thus
Expand Down
103 changes: 78 additions & 25 deletions rllib/examples/evaluation/evaluation_parallel_to_training.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,72 @@
"""Example showing how one can set up evaluation running in parallel to training.

Such a setup saves a considerable amount of time during RL Algorithm training, b/c
the next training step does NOT have to wait for the previous evaluation procedure to
finish, but can already start running (in parallel).

See RLlib's documentation for more details on the effect of the different supported
evaluation configuration options:
https://docs.ray.io/en/latest/rllib/rllib-advanced-api.html#customized-evaluation-during-training # noqa

For an example of how to write a fully customized evaluation function (which normally
is not necessary as the config options are sufficient and offer maximum flexibility),
see this example script here:

https://github.com/ray-project/ray/blob/master/rllib/examples/evaluation/custom_evaluation.py # noqa


How to run this script
----------------------
`python [script file name].py --enable-new-api-stack`

Use the `--evaluation-num-workers` option to scale up the evaluation workers. Note
that the requested evaluation duration (`--evaluation-duration` measured in
`--evaluation-duration-unit`, which is either "timesteps" (default) or "episodes") is
shared between all configured evaluation workers. For example, if the evaluation
duration is 10 and the unit is "episodes" and you configured 5 workers, then each of the
evaluation workers will run exactly 2 episodes.

For debugging, use the following additional command line options
`--no-tune --num-env-runners=0`
which should allow you to set breakpoints anywhere in the RLlib code and
have the execution stop there for inspection and debugging.

For logging to your WandB account, use:
`--wandb-key=[your WandB API key] --wandb-project=[some project name]
--wandb-run-name=[optional: WandB run name (within the defined project)]`


Results to expect
-----------------
You should see the following output (at the end of the experiment) in your console when
running with a fixed number of 100k training timesteps
(`--enable-new-api-stack --evaluation-duration=auto --stop-timesteps=100000
--stop-reward=100000`):
+-----------------------------+------------+-----------------+--------+
| Trial name | status | loc | iter |
|-----------------------------+------------+-----------------+--------+
| PPO_CartPole-v1_1377a_00000 | TERMINATED | 127.0.0.1:73330 | 25 |
+-----------------------------+------------+-----------------+--------+
+------------------+--------+----------+--------------------+
| total time (s) | ts | reward | episode_len_mean |
|------------------+--------+----------+--------------------|
| 71.7485 | 100000 | 476.51 | 476.51 |
+------------------+--------+----------+--------------------+

When running without parallel evaluation (`--evaluation-not-parallel-to-training` flag),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is really cool! Parallel to training

the experiment takes considerably longer (~70sec vs ~80sec):
+-----------------------------+------------+-----------------+--------+
| Trial name | status | loc | iter |
|-----------------------------+------------+-----------------+--------+
| PPO_CartPole-v1_f1788_00000 | TERMINATED | 127.0.0.1:75135 | 25 |
+-----------------------------+------------+-----------------+--------+
+------------------+--------+----------+--------------------+
| total time (s) | ts | reward | episode_len_mean |
|------------------+--------+----------+--------------------|
| 81.7371 | 100000 | 494.68 | 494.68 |
+------------------+--------+----------+--------------------+
"""
from ray.rllib.algorithms.callbacks import DefaultCallbacks
from ray.rllib.env.multi_agent_env_runner import MultiAgentEnvRunner
from ray.rllib.env.single_agent_env_runner import SingleAgentEnvRunner
from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
from ray.rllib.utils.test_utils import (
add_rllib_example_script_args,
Expand Down Expand Up @@ -114,13 +180,10 @@ def on_train_result(self, *, algorithm, result, **kwargs):
lambda _: MultiAgentCartPole(config={"num_agents": args.num_agents}),
)

config = (
base_config = (
get_trainable_cls(args.algo)
.get_default_config()
.experimental(_enable_new_api_stack=args.enable_new_api_stack)
.environment("env" if args.num_agents > 0 else "CartPole-v1")
# Run with tracing enabled for tf2.
.framework(args.framework)
# Use a custom callback that asserts that we are running the
# configured exact number of episodes per evaluation OR - in auto
# mode - run at least as many episodes as we have eval workers.
Expand Down Expand Up @@ -148,28 +211,11 @@ def on_train_result(self, *, algorithm, result, **kwargs):
# Switch off exploratory behavior for better (greedy) results.
evaluation_config={"explore": False},
)
.rollouts(
num_rollout_workers=args.num_env_runners,
# Set up the correct env-runner to use depending on
# old-stack/new-stack and multi-agent settings.
env_runner_cls=(
None
if not args.enable_new_api_stack
else SingleAgentEnvRunner
if args.num_agents == 0
else MultiAgentEnvRunner
),
)
.resources(
num_learner_workers=args.num_gpus,
num_gpus_per_learner_worker=int(args.num_gpus != 0),
num_cpus_for_local_worker=1,
)
)

# Add a simple multi-agent setup.
if args.num_agents > 0:
config.multi_agent(
base_config.multi_agent(
policies={f"p{i}" for i in range(args.num_agents)},
policy_mapping_fn=lambda aid, *a, **kw: f"p{aid}",
)
Expand All @@ -180,4 +226,11 @@ def on_train_result(self, *, algorithm, result, **kwargs):
"timesteps_total": args.stop_timesteps,
}

run_rllib_example_script_experiment(config, args, stop=stop)
run_rllib_example_script_experiment(
base_config,
args,
stop=stop,
success_metric={
"evaluation/sampler_results/episode_reward_mean": args.stop_reward,
},
)
Loading
Loading