Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] New API stack: Add systematic IMPALA learning tests for [CartPole|Pendulum] | [CPU|GPU|multi-CPU|multi-GPU] | [single- and multi-agent]. #46162

Merged
merged 11 commits into from
Jun 22, 2024
60 changes: 57 additions & 3 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,62 @@ py_test(
srcs = ["tuned_examples/impala/cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack"]
)
py_test(
name = "learning_tests_cartpole_impala_gpu",
main = "tuned_examples/impala/cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
size = "large",
srcs = ["tuned_examples/impala/cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"]
)
py_test(
name = "learning_tests_cartpole_impala_multi_cpu",
main = "tuned_examples/impala/cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/impala/cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
py_test(
name = "learning_tests_cartpole_impala_multi_gpu",
main = "tuned_examples/impala/cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/impala/cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"]
)
py_test(
name = "learning_tests_multi_agent_cartpole_impala",
main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "torch_only"],
size = "large",
srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "-stop-reward="]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"-stop-reward=" might error out.

)
py_test(
name = "learning_tests_multi_agent_cartpole_impala_gpu",
main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"],
size = "large",
srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=1"]
)
py_test(
name = "learning_tests_multi_agent_cartpole_impala_multi_cpu",
main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"],
size = "large",
srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2"]
)
py_test(
name = "learning_tests_multi_agent_cartpole_impala_multi_gpu",
main = "tuned_examples/impala/multi_agent_cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_cartpole", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"],
size = "large",
srcs = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2"]
)

#@OldAPIstack
py_test(
Expand All @@ -346,18 +402,16 @@ py_test(
],
args = ["--dir=tuned_examples/impala"]
)

#@OldAPIStack
py_test(
name = "learning_tests_multi_agent_cartpole_impala_old_api_stack",
main = "tests/run_regression_tests.py",
tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete"],
size = "medium",
srcs = ["tests/run_regression_tests.py"],
data = ["tuned_examples/impala/multi_agent_cartpole_impala.py"],
data = ["tuned_examples/impala/multi_agent_cartpole_impala_old_api_stack.py"],
args = ["--dir=tuned_examples/impala"]
)

#@OldAPIStack
py_test(
name = "learning_tests_cartpole_impala_fake_gpus_old_api_stack",
Expand Down
57 changes: 36 additions & 21 deletions rllib/tuned_examples/impala/multi_agent_cartpole_impala.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,58 @@
# @OldAPIStack
from ray.rllib.algorithms.impala import ImpalaConfig
from ray.rllib.connectors.env_to_module.mean_std_filter import MeanStdFilter
from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
from ray.rllib.utils.metrics import (
ENV_RUNNER_RESULTS,
EPISODE_RETURN_MEAN,
NUM_ENV_STEPS_SAMPLED_LIFETIME,
)
from ray import tune
from ray.rllib.utils.test_utils import add_rllib_example_script_args
from ray.tune.registry import register_env

tune.registry.register_env("env", lambda cfg: MultiAgentCartPole(config=cfg))
parser = add_rllib_example_script_args()
# Use `parser` to add your own custom command line options to this script
# and (if needed) use their values toset up `config` below.
args = parser.parse_args()

register_env("env", lambda cfg: MultiAgentCartPole(config=cfg))


config = (
ImpalaConfig()
.environment("env", env_config={"num_agents": 4})
.env_runners(
num_envs_per_env_runner=5,
num_env_runners=4,
observation_filter="MeanStdFilter",
.api_stack(
enable_rl_module_and_learner=True,
enable_env_runner_and_connector_v2=True,
)
.resources(num_gpus=1, _fake_gpus=True)
.multi_agent(
policies=["p0", "p1", "p2", "p3"],
policy_mapping_fn=(lambda agent_id, episode, worker, **kwargs: f"p{agent_id}"),
.environment("env", env_config={"num_agents": 2})
.env_runners(
env_to_module_connector=lambda env: MeanStdFilter(multi_agent=True),
)
.training(
num_sgd_iter=1,
vf_loss_coeff=0.005,
vtrace=True,
model={
"fcnet_hiddens": [32],
"fcnet_activation": "linear",
train_batch_size_per_learner=500,
grad_clip=40.0,
grad_clip_by="global_norm",
lr=0.0005,
vf_loss_coeff=0.1,
)
.rl_module(
model_config_dict={
"vf_share_layers": True,
"uses_new_env_runners": True,
},
replay_proportion=0.0,
)
.multi_agent(
policies=["p0", "p1"],
policy_mapping_fn=(lambda agent_id, episode, **kwargs: f"p{agent_id}"),
)
)

stop = {
f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": 600, # 600 / 4 (==num_agents) = 150
f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 200000,
f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": 800.0,
NUM_ENV_STEPS_SAMPLED_LIFETIME: 400000,
}


if __name__ == "__main__":
from ray.rllib.utils.test_utils import run_rllib_example_script_experiment

run_rllib_example_script_experiment(config, args, stop=stop)

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# @OldAPIStack
from ray.rllib.algorithms.impala import ImpalaConfig
from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
from ray.rllib.utils.metrics import (
ENV_RUNNER_RESULTS,
EPISODE_RETURN_MEAN,
NUM_ENV_STEPS_SAMPLED_LIFETIME,
)
from ray import tune

tune.registry.register_env("env", lambda cfg: MultiAgentCartPole(config=cfg))


config = (
ImpalaConfig()
.environment("env", env_config={"num_agents": 4})
.env_runners(
num_envs_per_env_runner=5,
num_env_runners=4,
observation_filter="MeanStdFilter",
)
.resources(num_gpus=1, _fake_gpus=True)
.multi_agent(
policies=["p0", "p1", "p2", "p3"],
policy_mapping_fn=(lambda agent_id, episode, worker, **kwargs: f"p{agent_id}"),
)
.training(
num_sgd_iter=1,
vf_loss_coeff=0.005,
vtrace=True,
model={
"fcnet_hiddens": [32],
"fcnet_activation": "linear",
"vf_share_layers": True,
},
replay_proportion=0.0,
)
)

stop = {
f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": 600, # 600 / 4 (==num_agents) = 150
f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 200000,
}
Loading