[RLlib] Update autoregressive actions example. (ray-project#47829)

Signed-off-by: ujjawal-khare <[email protected]>
ujjawal-khare-27 · Oct 15, 2024 · 9b55915 · 9b55915
1 parent 4260777
commit 9b55915
Show file tree

Hide file tree

Showing 3 changed files with 60 additions and 6 deletions.
diff --git a/rllib/BUILD b/rllib/BUILD
@@ -3263,6 +3263,56 @@ py_test(
     args = ["--as-test", "--framework=torch", "--stop-reward=-0.012", "--num-cpus=4"]
 )
 
+#@OldAPIStack
+py_test(
+    name = "examples/cartpole_lstm_impala_tf2",
+    main = "examples/cartpole_lstm.py",
+    tags = ["team:rllib", "exclusive", "examples"],
+    size = "medium",
+    srcs = ["examples/cartpole_lstm.py"],
+    args = ["--run=IMPALA", "--as-test", "--framework=tf2", "--stop-reward=28", "--num-cpus=4"]
+)
+
+#@OldAPIStack
+py_test(
+    name = "examples/cartpole_lstm_impala_torch",
+    main = "examples/cartpole_lstm.py",
+    tags = ["team:rllib", "exclusive", "examples"],
+    size = "medium",
+    srcs = ["examples/cartpole_lstm.py"],
+    args = ["--run=IMPALA", "--as-test", "--framework=torch", "--stop-reward=28", "--num-cpus=4"]
+)
+
+#@OldAPIStack
+py_test(
+    name = "examples/cartpole_lstm_ppo_tf2",
+    main = "examples/cartpole_lstm.py",
+    tags = ["team:rllib", "exclusive", "examples"],
+    size = "large",
+    srcs = ["examples/cartpole_lstm.py"],
+    args = ["--run=PPO", "--as-test", "--framework=tf2", "--stop-reward=28", "--num-cpus=4"]
+)
+
+#@OldAPIStack
+py_test(
+    name = "examples/cartpole_lstm_ppo_torch",
+    main = "examples/cartpole_lstm.py",
+    tags = ["team:rllib", "exclusive", "examples"],
+    size = "medium",
+    srcs = ["examples/cartpole_lstm.py"],
+    args = ["--run=PPO", "--as-test", "--framework=torch", "--stop-reward=28", "--num-cpus=4"]
+)
+
+#@OldAPIStack
+py_test(
+    name = "examples/cartpole_lstm_ppo_torch_with_prev_a_and_r",
+    main = "examples/cartpole_lstm.py",
+    tags = ["team:rllib", "exclusive", "examples"],
+    size = "medium",
+    srcs = ["examples/cartpole_lstm.py"],
+    args = ["--run=PPO", "--as-test", "--framework=torch", "--stop-reward=28", "--num-cpus=4", "--use-prev-action",  "--use-prev-reward"]
+)
+
 #@OldAPIStack
 py_test(
     name = "examples/centralized_critic_tf",

diff --git a/rllib/examples/autoregressive_action_dist.py b/rllib/examples/autoregressive_action_dist.py
@@ -147,7 +147,9 @@ def get_cli_args():
     config = (
         get_trainable_cls(args.run)
         .get_default_config()
-        .environment(CorrelatedActionsEnv)
+        # Batch-norm models have not been migrated to the RL Module API yet.
+        .api_stack(enable_rl_module_and_learner=False)
+        .environment(AutoRegressiveActionEnv)
         .framework(args.framework)
         .training(gamma=0.5)
         # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.

diff --git a/rllib/examples/rl_modules/classes/autoregressive_actions_rlm.py b/rllib/examples/rl_modules/classes/autoregressive_actions_rlm.py
@@ -1,10 +1,11 @@
 import abc
 from abc import abstractmethod
-from typing import Any, Dict
+from typing import Dict
 
 from ray.rllib.core import Columns
 from ray.rllib.core.models.base import ENCODER_OUT
 from ray.rllib.core.models.configs import MLPHeadConfig
+from ray.rllib.core.models.specs.specs_dict import SpecDict
 from ray.rllib.core.rl_module.apis.value_function_api import ValueFunctionAPI
 from ray.rllib.core.rl_module.rl_module import RLModule
 from ray.rllib.core.rl_module.torch.torch_rl_module import TorchRLModule
@@ -233,10 +234,11 @@ def _forward_train(self, batch: Dict[str, TensorType]) -> Dict[str, TensorType]:
         return outs
 
     @override(ValueFunctionAPI)
-    def compute_values(self, batch: Dict[str, TensorType], embeddings=None):
-        # Encoder forward pass to get `embeddings`, if necessary.
-        if embeddings is None:
-            embeddings = self.encoder(batch)[ENCODER_OUT]
+    def compute_values(self, batch: Dict[str, TensorType]):
+
+        # Encoder forward pass.
+        encoder_outs = self.encoder(batch)[ENCODER_OUT]
+
         # Value head forward pass.
         vf_out = self.vf(embeddings)
         # Squeeze out last dimension (single node value head).