From f44a6da0e036c0f46056b7e2f868ce3a868e6d99 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Tue, 17 Sep 2024 13:16:57 +0200 Subject: [PATCH 1/2] wip Signed-off-by: sven1977 --- rllib/env/multi_agent_env_runner.py | 10 ++++++++-- rllib/env/single_agent_env_runner.py | 10 ++++++++-- rllib/utils/test_utils.py | 13 ++++++++----- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py index b6cf7eea5562..647073a96ca3 100644 --- a/rllib/env/multi_agent_env_runner.py +++ b/rllib/env/multi_agent_env_runner.py @@ -828,16 +828,22 @@ def make_env(self): remote=self.config.remote_worker_envs, ) + # No env provided -> Error. + if not self.config.env: + raise ValueError( + "`config.env` is not provided! You should provide a valid environment " + "to your config through `config.environment([env descriptor e.g. " + "'CartPole-v1'])`." + ) # Register env for the local context. # Note, `gym.register` has to be called on each worker. - if isinstance(self.config.env, str) and _global_registry.contains( + elif isinstance(self.config.env, str) and _global_registry.contains( ENV_CREATOR, self.config.env ): entry_point = partial( _global_registry.get(ENV_CREATOR, self.config.env), env_ctx, ) - else: entry_point = partial( _gym_env_creator, diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py index 73c8c417f7a1..857e7ab6fc58 100644 --- a/rllib/env/single_agent_env_runner.py +++ b/rllib/env/single_agent_env_runner.py @@ -794,16 +794,22 @@ def make_env(self) -> None: remote=self.config.remote_worker_envs, ) + # No env provided -> Error. + if not self.config.env: + raise ValueError( + "`config.env` is not provided! You should provide a valid environment " + "to your config through `config.environment([env descriptor e.g. " + "'CartPole-v1'])`." + ) # Register env for the local context. # Note, `gym.register` has to be called on each worker. - if isinstance(self.config.env, str) and _global_registry.contains( + elif isinstance(self.config.env, str) and _global_registry.contains( ENV_CREATOR, self.config.env ): entry_point = partial( _global_registry.get(ENV_CREATOR, self.config.env), env_ctx, ) - else: entry_point = partial( _gym_env_creator, diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py index aa8ea1401aa7..de2adbbe990c 100644 --- a/rllib/utils/test_utils.py +++ b/rllib/utils/test_utils.py @@ -1392,15 +1392,18 @@ def run_rllib_example_script_experiment( # Define compute resources used automatically (only using the --num-gpus arg). # New stack. if config.enable_rl_module_and_learner: + # Do we have GPUs available in the cluster? + num_gpus = ray.cluster_resources().get("GPU", 0) + if args.num_gpus > 0 and num_gpus < args.num_gpus: + raise ValueError( + f"You are running your script with --num-gpus={args.num_gpus}, " + "but your cluster only has {resources['GPU']} GPUs! Will " + ) # Define compute resources used. config.resources(num_gpus=0) config.learners( num_learners=args.num_gpus, - num_gpus_per_learner=( - 1 - if torch and torch.cuda.is_available() and args.num_gpus > 0 - else 0 - ), + num_gpus_per_learner=1 if num_gpus >= args.num_gpus > 0 else 0, ) config.resources(num_gpus=0) # Old stack. From 31bd6d1b7022a26948160f84ea00e08ca0d5119b Mon Sep 17 00:00:00 2001 From: sven1977 Date: Tue, 17 Sep 2024 14:14:39 +0200 Subject: [PATCH 2/2] wip Signed-off-by: sven1977 --- rllib/utils/test_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py index de2adbbe990c..b5b3d6604ab0 100644 --- a/rllib/utils/test_utils.py +++ b/rllib/utils/test_utils.py @@ -1395,9 +1395,10 @@ def run_rllib_example_script_experiment( # Do we have GPUs available in the cluster? num_gpus = ray.cluster_resources().get("GPU", 0) if args.num_gpus > 0 and num_gpus < args.num_gpus: - raise ValueError( + logger.warning( f"You are running your script with --num-gpus={args.num_gpus}, " - "but your cluster only has {resources['GPU']} GPUs! Will " + f"but your cluster only has {num_gpus} GPUs! Will run " + f"with {num_gpus} CPU Learners instead." ) # Define compute resources used. config.resources(num_gpus=0)