From c68373d4476995d2f61009a83bd17b87bd3ee8bc Mon Sep 17 00:00:00 2001
From: sven1977 <svenmika1977@gmail.com>
Date: Mon, 16 Sep 2024 17:58:01 +0200
Subject: [PATCH 1/2] wip

Signed-off-by: sven1977 <svenmika1977@gmail.com>
---
 .../long_running_tests/workloads/many_ppo.py  |  6 +--
 rllib/BUILD                                   | 46 -------------------
 rllib/tuned_examples/ppo/atari_ppo.py         |  2 +-
 rllib/utils/test_utils.py                     |  9 ++--
 4 files changed, 10 insertions(+), 53 deletions(-)

diff --git a/release/long_running_tests/workloads/many_ppo.py b/release/long_running_tests/workloads/many_ppo.py
index 2238f7bf0db0..09ea444fce55 100644
--- a/release/long_running_tests/workloads/many_ppo.py
+++ b/release/long_running_tests/workloads/many_ppo.py
@@ -2,6 +2,7 @@
 # This covers https://github.com/ray-project/ray/pull/12148
 
 import ray
+from ray.rllib.utils.metrics import NUM_ENV_STEPS_SAMPLED_LIFETIME
 from ray.tune import run_experiments
 from ray.tune.utils.release_test_util import ProgressCallback
 from ray._private.test_utils import monitor_memory_usage
@@ -34,13 +35,12 @@
             "env": "CartPole-v0",
             "num_samples": 10000,
             "config": {
-                "framework": "torch",
-                "num_workers": 7,
+                "num_env_runners": 7,
                 "num_gpus": 0,
                 "num_sgd_iter": 1,
             },
             "stop": {
-                "timesteps_total": 1,
+                NUM_ENV_STEPS_SAMPLED_LIFETIME: 1,
             },
         }
     },
diff --git a/rllib/BUILD b/rllib/BUILD
index ec74fa931472..5cba1f4607f9 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -319,52 +319,6 @@ py_test(
     args = ["--dir=tuned_examples/appo"]
 )
 
-# Tests against crashing or hanging environments.
-# Single-agent: Crash only.
-#@OldAPIStack
-py_test(
-    name = "learning_tests_cartpole_crashing_appo_old_api_stack",
-    main = "tests/run_regression_tests.py",
-    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_discrete", "crashing_cartpole", "torch_only"],
-    size = "large",
-    srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/appo/cartpole-crashing-recreate-workers-appo.py"],
-    args = ["--dir=tuned_examples/appo", "--num-cpus=6"]
-)
-# Single-agent: Crash and stall.
-#@OldAPIStack
-py_test(
-    name = "learning_tests_cartpole_crashing_and_stalling_appo_old_api_stack",
-    main = "tests/run_regression_tests.py",
-    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_discrete", "crashing_cartpole", "torch_only"],
-    size = "large",
-    srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/appo/cartpole-crashing-and-stalling-recreate-workers-appo.py"],
-    args = ["--dir=tuned_examples/appo", "--num-cpus=6"]
-)
-# Multi-agent: Crash only.
-#@OldAPIStack
-py_test(
-    name = "learning_tests_multi_agent_cartpole_crashing_appo_old_api_stack",
-    main = "tests/run_regression_tests.py",
-    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_discrete", "crashing_cartpole", "torch_only"],
-    size = "large",
-    srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/appo/multi-agent-cartpole-crashing-recreate-workers-appo.py"],
-    args = ["--dir=tuned_examples/appo", "--num-cpus=6"]
-)
-# Multi-agent: Crash and stall.
-#@OldAPIStack
-py_test(
-    name = "learning_tests_multi_agent_cartpole_crashing_and_stalling_appo_old_api_stack",
-    main = "tests/run_regression_tests.py",
-    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_discrete", "crashing_cartpole", "torch_only"],
-    size = "large",
-    srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/appo/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py"],
-    args = ["--dir=tuned_examples/appo", "--num-cpus=6"]
-)
-
 # BC
 # CartPole
 py_test(
diff --git a/rllib/tuned_examples/ppo/atari_ppo.py b/rllib/tuned_examples/ppo/atari_ppo.py
index 5f06866894a6..17cdd09945d2 100644
--- a/rllib/tuned_examples/ppo/atari_ppo.py
+++ b/rllib/tuned_examples/ppo/atari_ppo.py
@@ -31,7 +31,7 @@ def _make_learner_connector(input_observation_space, input_action_space):
 # We would like our frame stacking connector to do this job.
 def _env_creator(cfg):
     return wrap_atari_for_new_api_stack(
-        gym.make(args.env, **cfg),
+        gym.make(args.env, **cfg, render_mode="rgb_array"),
         # Perform frame-stacking through ConnectorV2 API.
         framestack=None,
     )
diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py
index 7ff7eff0fbc8..64b1356fa755 100644
--- a/rllib/utils/test_utils.py
+++ b/rllib/utils/test_utils.py
@@ -30,7 +30,7 @@
 import ray
 from ray import air, tune
 from ray.air.constants import TRAINING_ITERATION
-from ray.air.integrations.wandb import WandbLoggerCallback
+from ray.air.integrations.wandb import WandbLoggerCallback, WANDB_ENV_VAR
 from ray.rllib.common import SupportedFileType
 from ray.rllib.env.wrappers.atari_wrappers import is_atari, wrap_deepmind
 from ray.rllib.train import load_experiments_from_file
@@ -1462,13 +1462,16 @@ def run_rllib_example_script_experiment(
 
     # Log results using WandB.
     tune_callbacks = tune_callbacks or []
-    if hasattr(args, "wandb_key") and args.wandb_key is not None:
+    if hasattr(args, "wandb_key") and (
+        args.wandb_key is not None or os.environ[WANDB_ENV_VAR]
+    ):
+        wandb_key = args.wandb_key or os.environ[WANDB_ENV_VAR]
         project = args.wandb_project or (
             args.algo.lower() + "-" + re.sub("\\W+", "-", str(config.env).lower())
         )
         tune_callbacks.append(
             WandbLoggerCallback(
-                api_key=args.wandb_key,
+                api_key=wandb_key,
                 project=project,
                 upload_checkpoints=True,
                 **({"name": args.wandb_run_name} if args.wandb_run_name else {}),

From 117ebc502ecdf0156d46e8a19df8ba861c078b27 Mon Sep 17 00:00:00 2001
From: sven1977 <svenmika1977@gmail.com>
Date: Mon, 16 Sep 2024 19:24:39 +0200
Subject: [PATCH 2/2] wip

Signed-off-by: sven1977 <svenmika1977@gmail.com>
---
 rllib/utils/test_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py
index 64b1356fa755..aa8ea1401aa7 100644
--- a/rllib/utils/test_utils.py
+++ b/rllib/utils/test_utils.py
@@ -1463,7 +1463,7 @@ def run_rllib_example_script_experiment(
     # Log results using WandB.
     tune_callbacks = tune_callbacks or []
     if hasattr(args, "wandb_key") and (
-        args.wandb_key is not None or os.environ[WANDB_ENV_VAR]
+        args.wandb_key is not None or WANDB_ENV_VAR in os.environ
     ):
         wandb_key = args.wandb_key or os.environ[WANDB_ENV_VAR]
         project = args.wandb_project or (