ray-project · sven1977 · Sep 16, 2024 · Sep 16, 2024 · Sep 16, 2024 · simonsays1980
diff --git a/release/long_running_tests/workloads/many_ppo.py b/release/long_running_tests/workloads/many_ppo.py
@@ -2,6 +2,7 @@
 # This covers https://github.com/ray-project/ray/pull/12148
 
 import ray
+from ray.rllib.utils.metrics import NUM_ENV_STEPS_SAMPLED_LIFETIME
 from ray.tune import run_experiments
 from ray.tune.utils.release_test_util import ProgressCallback
 from ray._private.test_utils import monitor_memory_usage
@@ -34,13 +35,12 @@
             "env": "CartPole-v0",
             "num_samples": 10000,
             "config": {
-                "framework": "torch",
-                "num_workers": 7,
+                "num_env_runners": 7,
                 "num_gpus": 0,
                 "num_sgd_iter": 1,
             },
             "stop": {
-                "timesteps_total": 1,
+                NUM_ENV_STEPS_SAMPLED_LIFETIME: 1,
             },
         }
     },

@@ -319,52 +319,6 @@ py_test(
     args = ["--dir=tuned_examples/appo"]
 )
 
-# Tests against crashing or hanging environments.
-# Single-agent: Crash only.
-#@OldAPIStack
-py_test(
-    name = "learning_tests_cartpole_crashing_appo_old_api_stack",
-    main = "tests/run_regression_tests.py",
-    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_discrete", "crashing_cartpole", "torch_only"],
-    size = "large",
-    srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/appo/cartpole-crashing-recreate-workers-appo.py"],
-    args = ["--dir=tuned_examples/appo", "--num-cpus=6"]
-)
-# Single-agent: Crash and stall.
-#@OldAPIStack
-py_test(
-    name = "learning_tests_cartpole_crashing_and_stalling_appo_old_api_stack",
-    main = "tests/run_regression_tests.py",
-    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_discrete", "crashing_cartpole", "torch_only"],
-    size = "large",
-    srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/appo/cartpole-crashing-and-stalling-recreate-workers-appo.py"],
-    args = ["--dir=tuned_examples/appo", "--num-cpus=6"]
-)
-# Multi-agent: Crash only.
-#@OldAPIStack
-py_test(
-    name = "learning_tests_multi_agent_cartpole_crashing_appo_old_api_stack",
-    main = "tests/run_regression_tests.py",
-    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_discrete", "crashing_cartpole", "torch_only"],
-    size = "large",
-    srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/appo/multi-agent-cartpole-crashing-recreate-workers-appo.py"],
-    args = ["--dir=tuned_examples/appo", "--num-cpus=6"]
-)
-# Multi-agent: Crash and stall.
-#@OldAPIStack
-py_test(
-    name = "learning_tests_multi_agent_cartpole_crashing_and_stalling_appo_old_api_stack",
-    main = "tests/run_regression_tests.py",
-    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_discrete", "crashing_cartpole", "torch_only"],
-    size = "large",
-    srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/appo/multi-agent-cartpole-crashing-and-stalling-recreate-workers-appo.py"],
-    args = ["--dir=tuned_examples/appo", "--num-cpus=6"]
-)
-
 # BC
 # CartPole
 py_test(

@@ -31,7 +31,7 @@ def _make_learner_connector(input_observation_space, input_action_space):
 # We would like our frame stacking connector to do this job.
 def _env_creator(cfg):
     return wrap_atari_for_new_api_stack(
-        gym.make(args.env, **cfg),
+        gym.make(args.env, **cfg, render_mode="rgb_array"),
         # Perform frame-stacking through ConnectorV2 API.
         framestack=None,
     )

@@ -30,7 +30,7 @@
 import ray
 from ray import air, tune
 from ray.air.constants import TRAINING_ITERATION
-from ray.air.integrations.wandb import WandbLoggerCallback
+from ray.air.integrations.wandb import WandbLoggerCallback, WANDB_ENV_VAR
 from ray.rllib.common import SupportedFileType
 from ray.rllib.env.wrappers.atari_wrappers import is_atari, wrap_deepmind
 from ray.rllib.train import load_experiments_from_file
@@ -1462,13 +1462,16 @@ def run_rllib_example_script_experiment(
 
     # Log results using WandB.
     tune_callbacks = tune_callbacks or []
-    if hasattr(args, "wandb_key") and args.wandb_key is not None:
+    if hasattr(args, "wandb_key") and (
+        args.wandb_key is not None or os.environ[WANDB_ENV_VAR]
+    ):
+        wandb_key = args.wandb_key or os.environ[WANDB_ENV_VAR]
         project = args.wandb_project or (
             args.algo.lower() + "-" + re.sub("\\W+", "-", str(config.env).lower())
         )
         tune_callbacks.append(
             WandbLoggerCallback(
-                api_key=args.wandb_key,
+                api_key=wandb_key,
                 project=project,
                 upload_checkpoints=True,
                 **({"name": args.wandb_run_name} if args.wandb_run_name else {}),