ray-project · sven1977 · Apr 27, 2024 · Apr 21, 2024 · Apr 22, 2024 · Apr 23, 2024
@@ -113,7 +113,7 @@ def __init__(self, *args, **kwargs):
 
 config = (
     PPOConfig()
-    .experimental(_enable_new_api_stack=True)
+    .api_stack(enable_rl_module_and_learner=True)
     .environment("CartPole-v1")
     .framework("torch")
 )

@@ -1,22 +1,19 @@
 # __enabling-new-api-stack-sa-ppo-begin__
 
 from ray.rllib.algorithms.ppo import PPOConfig
-from ray.rllib.env.single_agent_env_runner import SingleAgentEnvRunner
 
 
 config = (
     PPOConfig().environment("CartPole-v1")
-    # Switch the new API stack flag to True (False by default).
-    # This enables the use of the RLModule (replaces ModelV2) AND Learner (replaces
-    # Policy) classes.
-    .experimental(_enable_new_api_stack=True)
-    # However, the above flag only activates the RLModule and Learner APIs. In order
-    # to utilize all of the new API stack's classes, you also have to specify the
-    # EnvRunner (replaces RolloutWorker) to use.
-    # Note that this step will be fully automated in the next release.
-    # Set the `env_runner_cls` to `SingleAgentEnvRunner` for single-agent setups and
-    # `MultiAgentEnvRunner` for multi-agent cases.
-    .env_runners(env_runner_cls=SingleAgentEnvRunner)
+    # Switch both the new API stack flags to True (both False by default).
+    # This enables the use of
+    # a) RLModule (replaces ModelV2) and Learner (replaces Policy)
+    # b) and automatically picks the correct EnvRunner (single-agent vs multi-agent)
+    # and enables ConnectorV2 support.
+    .api_stack(
+        enable_rl_module_and_learner=True,
+        enable_env_runner_and_connector_v2=True,
+    )
     # We are using a simple 1-CPU setup here for learning. However, as the new stack
     # supports arbitrary scaling on the learner axis, feel free to set
     # `num_learner_workers` to the number of available GPUs for multi-GPU training (and
@@ -43,25 +40,22 @@
 # __enabling-new-api-stack-ma-ppo-begin__
 
 from ray.rllib.algorithms.ppo import PPOConfig  # noqa
-from ray.rllib.env.multi_agent_env_runner import MultiAgentEnvRunner  # noqa
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole  # noqa
 
 
 # A typical multi-agent setup (otherwise using the exact same parameters as before)
 # looks like this.
 config = (
     PPOConfig().environment(MultiAgentCartPole, env_config={"num_agents": 2})
-    # Switch the new API stack flag to True (False by default).
-    # This enables the use of the RLModule (replaces ModelV2) AND Learner (replaces
-    # Policy) classes.
-    .experimental(_enable_new_api_stack=True)
-    # However, the above flag only activates the RLModule and Learner APIs. In order
-    # to utilize all of the new API stack's classes, you also have to specify the
-    # EnvRunner (replaces RolloutWorker) to use.
-    # Note that this step will be fully automated in the next release.
-    # Set the `env_runner_cls` to `SingleAgentEnvRunner` for single-agent setups and
-    # `MultiAgentEnvRunner` for multi-agent cases.
-    .env_runners(env_runner_cls=MultiAgentEnvRunner)
+    # Switch both the new API stack flags to True (both False by default).
+    # This enables the use of
+    # a) RLModule (replaces ModelV2) and Learner (replaces Policy)
+    # b) and automatically picks the correct EnvRunner (single-agent vs multi-agent)
+    # and enables ConnectorV2 support.
+    .api_stack(
+        enable_rl_module_and_learner=True,
+        enable_env_runner_and_connector_v2=True,
+    )
     # We are using a simple 1-CPU setup here for learning. However, as the new stack
     # supports arbitrary scaling on the learner axis, feel free to set
     # `num_learner_workers` to the number of available GPUs for multi-GPU training (and
@@ -95,20 +89,19 @@
 # __enabling-new-api-stack-sa-sac-begin__
 
 from ray.rllib.algorithms.sac import SACConfig  # noqa
-from ray.rllib.env.single_agent_env_runner import SingleAgentEnvRunner  # noqa
 
 
 config = (
     SACConfig().environment("Pendulum-v1")
-    # Switch the new API stack flag to True (False by default).
-    # This enables the use of the RLModule (replaces ModelV2) AND Learner (replaces
-    # Policy) classes.
-    .experimental(_enable_new_api_stack=True)
-    # However, the above flag only activates the RLModule and Learner APIs. In order
-    # to utilize all of the new API stack's classes, you also have to specify the
-    # EnvRunner (replaces RolloutWorker) to use.
-    # Note that this step will be fully automated in the next release.
-    .env_runners(env_runner_cls=SingleAgentEnvRunner)
+    # Switch both the new API stack flags to True (both False by default).
+    # This enables the use of
+    # a) RLModule (replaces ModelV2) and Learner (replaces Policy)
+    # b) and automatically picks the correct EnvRunner (single-agent vs multi-agent)
+    # and enables ConnectorV2 support.
+    .api_stack(
+        enable_rl_module_and_learner=True,
+        enable_env_runner_and_connector_v2=True,
+    )
     # We are using a simple 1-CPU setup here for learning. However, as the new stack
     # supports arbitrary scaling on the learner axis, feel free to set
     # `num_learner_workers` to the number of available GPUs for multi-GPU training (and

@@ -12,7 +12,7 @@
 
 config = (
     PPOConfig()
-    .experimental(_enable_new_api_stack=True)
+    .api_stack(enable_rl_module_and_learner=True)
     .framework("torch")
     .environment("CartPole-v1")
 )
@@ -80,7 +80,7 @@
 
 config = (
     BCConfigTest()
-    .experimental(_enable_new_api_stack=True)
+    .api_stack(enable_rl_module_and_learner=True)
     .environment("CartPole-v1")
     .rl_module(
         model_config_dict={"fcnet_hiddens": [32, 32]},
@@ -103,7 +103,7 @@
 
 config = (
     BCConfigTest()
-    .experimental(_enable_new_api_stack=True)
+    .api_stack(enable_rl_module_and_learner=True)
     .environment(MultiAgentCartPole, env_config={"num_agents": 2})
     .rl_module(
         model_config_dict={"fcnet_hiddens": [32, 32]},
@@ -406,7 +406,7 @@ def setup(self):
 config = (
     PPOConfig()
     # Enable the new API stack (RLModule and Learner APIs).
-    .experimental(_enable_new_api_stack=True).environment("CartPole-v1")
+    .api_stack(enable_rl_module_and_learner=True).environment("CartPole-v1")
 )
 env = gym.make("CartPole-v1")
 # Create an RL Module that we would like to checkpoint

@@ -58,7 +58,7 @@ arguments in the :py:class:`~ray.rllib.algorithms.algorithm_config.AlgorithmConf
 
     config = (
         PPOConfig()
-        .experimental(_enable_new_api_stack=True)
+        .api_stack(enable_rl_module_and_learner=True)
         .resources(
             num_gpus_per_learner_worker=0,  # Set this to 1 to enable GPU training.
             num_cpus_per_learner_worker=1,
@@ -77,7 +77,7 @@ arguments in the :py:class:`~ray.rllib.algorithms.algorithm_config.AlgorithmConf
 .. note::
 
     This features is in alpha. If you migrate to this algorithm, enable the feature by 
-    via `AlgorithmConfig.experimental(_enable_new_api_stack=True)`.
+    via `AlgorithmConfig.api_stack(enable_rl_module_and_learner=True)`.
 
     The following algorithms support :py:class:`~ray.rllib.core.learner.learner.Learner` out of the box. Implement
     an algorithm with a custom :py:class:`~ray.rllib.core.learner.learner.Learner` to leverage this API for other algorithms.

@@ -64,7 +64,7 @@ RL Module is a neural network container that implements three public methods: :p
 Enabling RL Modules in the Configuration
 ----------------------------------------
 
-Enable RL Modules via our configuration object: ``AlgorithmConfig.experimental(_enable_new_api_stack=True)``.
+Enable RL Modules via our configuration object: ``AlgorithmConfig.api_stack(enable_rl_module_and_learner=True)``.
 
 .. literalinclude:: doc_code/rlmodule_guide.py
     :language: python

@@ -220,7 +220,7 @@ These functions return values for each worker as a list.
 You can also access just the "master" copy of the algorithm state through
 ``Algorithm.get_policy()`` or ``Algorithm.workers.local_worker()``,
 but note that updates here may not be immediately reflected in
-your rollout workers (if you have configured ``num_rollout_workers > 0``).
+your rollout workers (if you have configured ``num_env_runners > 0``).
 Here's a quick example of how to access state of a model:
 
 .. literalinclude:: ./doc_code/getting_started.py