Skip to content

Commit

Permalink
[RLlib] IMPALA on new API stack (w/ EnvRunner- and ConnectorV2 APIs). (
Browse files Browse the repository at this point in the history
  • Loading branch information
sven1977 authored Jun 19, 2024
1 parent 5e2fb2e commit 231a013
Show file tree
Hide file tree
Showing 62 changed files with 2,835 additions and 1,739 deletions.
137 changes: 76 additions & 61 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -152,16 +152,6 @@ py_test(
# --------------------------------------------------------------------

# APPO
#@OldAPIStack
py_test(
name = "learning_tests_cartpole_appo_hybrid_api_stack",
main = "tests/run_regression_tests.py",
tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "no_tf_static_graph"],
size = "medium", # bazel may complain about it being too long sometimes - medium is on purpose as some frameworks take longer
srcs = ["tests/run_regression_tests.py"],
data = ["tuned_examples/appo/cartpole-appo-w-rl-modules-and-learner.yaml"],
args = ["--dir=tuned_examples/appo"]
)

#@OldAPIStack
py_test(
Expand Down Expand Up @@ -335,18 +325,16 @@ py_test(
)

# IMPALA
#@OldAPIStack
# py_test(
# name = "learning_tests_cartpole_impala_old_api_stack",
# main = "tests/run_regression_tests.py",
# tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete"],
# size = "large",
# srcs = ["tests/run_regression_tests.py"],
# data = ["tuned_examples/impala/cartpole-impala.yaml"],
# args = ["--dir=tuned_examples/impala"]
# )
py_test(
name = "learning_tests_cartpole_impala",
main = "tuned_examples/impala/cartpole_impala.py",
tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "torch_only"],
size = "large",
srcs = ["tuned_examples/impala/cartpole_impala.py"],
args = ["--as-test", "--enable-new-api-stack"]
)

#@OldAPIStack
#@OldAPIstack
py_test(
name = "learning_tests_cartpole_separate_losses_impala_old_api_stack",
main = "tests/run_regression_tests.py",
Expand Down Expand Up @@ -578,12 +566,6 @@ py_test(
size = "large",
srcs = ["algorithms/appo/tests/test_appo.py"]
)
py_test(
name = "test_appo_off_policyness",
tags = ["team:rllib", "algorithms_dir", "multi_gpu", "exclusive"],
size = "large",
srcs = ["algorithms/appo/tests/test_appo_off_policyness.py"]
)
py_test(
name = "test_appo_learner",
tags = ["team:rllib", "algorithms_dir"],
Expand Down Expand Up @@ -632,7 +614,7 @@ py_test(
srcs = ["algorithms/dreamerv3/tests/test_dreamerv3.py"]
)

# Impala
# IMPALA
py_test(
name = "test_impala",
tags = ["team:rllib", "algorithms_dir"],
Expand All @@ -651,18 +633,6 @@ py_test(
size = "small",
srcs = ["algorithms/impala/tests/test_vtrace_v2.py"]
)
py_test(
name = "test_impala_off_policyness",
tags = ["team:rllib", "algorithms_dir", "exclusive"],
size = "large",
srcs = ["algorithms/impala/tests/test_impala_off_policyness.py"]
)
py_test(
name = "test_impala_learner",
tags = ["team:rllib", "algorithms_dir"],
size = "medium",
srcs = ["algorithms/impala/tests/test_impala_learner.py"]
)

# MARWIL
py_test(
Expand Down Expand Up @@ -739,58 +709,49 @@ py_test(
# Tag: memory_leak_tests
# --------------------------------------------------------------------

# @OldAPIStack
py_test(
name = "test_memory_leak_appo",
name = "test_memory_leak_appo_old_api_stack",
tags = ["team:rllib", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/appo/memory-leak-test-appo.yaml"],
args = ["--dir=tuned_examples/appo"]
)

# @OldAPIStack
py_test(
name = "test_memory_leak_dqn",
name = "test_memory_leak_dqn_old_api_stack",
tags = ["team:rllib", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/dqn/memory-leak-test-dqn.yaml"],
args = ["--dir=tuned_examples/dqn"]
)

# @OldAPIStack
py_test(
name = "test_memory_leak_impala",
name = "test_memory_leak_impala_old_api_stack",
tags = ["team:rllib", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/impala/memory-leak-test-impala.yaml"],
args = ["--dir=tuned_examples/impala"]
)

# @OldAPIStack
py_test(
name = "test_memory_leak_ppo",
name = "test_memory_leak_ppo_old_api_stack",
tags = ["team:rllib", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/ppo/memory-leak-test-ppo.yaml"],
args = ["--dir=tuned_examples/ppo"]
)

py_test(
name = "test_memory_leak_ppo_new_stack",
tags = ["team:rllib", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/ppo/memory_leak_test_ppo_new_stack.py"],
args = ["--dir=tuned_examples/ppo", "--to-check=rollout_worker"]
)

# @OldAPIStack
py_test(
name = "test_memory_leak_sac",
name = "test_memory_leak_sac_old_api_stack",
tags = ["team:rllib", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
Expand Down Expand Up @@ -2240,6 +2201,7 @@ py_test(
# subdirectory: connectors/
# ....................................
# Framestacking examples only run in smoke-test mode (a few iters only).
# PPO
py_test(
name = "examples/connectors/frame_stacking_ppo",
main = "examples/connectors/frame_stacking.py",
Expand All @@ -2257,8 +2219,26 @@ py_test(
srcs = ["examples/connectors/frame_stacking.py"],
args = ["--enable-new-api-stack", "--num-agents=2", "--stop-iter=2", "--framework=torch", "--algo=PPO", "--num-env-runners=4", "--num-cpus=6"]
)
# IMPALA
py_test(
name = "examples/connectors/frame_stacking_impala",
main = "examples/connectors/frame_stacking.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/connectors/frame_stacking.py"],
args = ["--enable-new-api-stack", "--stop-iter=2", "--framework=torch", "--algo=IMPALA"]
)
py_test(
name = "examples/connectors/frame_stacking_multi_agent_impala",
main = "examples/connectors/frame_stacking.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/connectors/frame_stacking.py"],
args = ["--enable-new-api-stack", "--num-agents=2", "--stop-iter=2", "--framework=torch", "--algo=IMPALA", "--num-env-runners=4", "--num-cpus=6"]
)

# Nested observation spaces (flattening).
# PPO
py_test(
name = "examples/connectors/flatten_observations_dict_space_ppo",
main = "examples/connectors/flatten_observations_dict_space.py",
Expand All @@ -2267,7 +2247,6 @@ py_test(
srcs = ["examples/connectors/flatten_observations_dict_space.py"],
args = ["--enable-new-api-stack", "--as-test", "--stop-reward=400.0", "--framework=torch", "--algo=PPO"]
)

py_test(
name = "examples/connectors/flatten_observations_dict_space_multi_agent_ppo",
main = "examples/connectors/flatten_observations_dict_space.py",
Expand All @@ -2276,6 +2255,23 @@ py_test(
srcs = ["examples/connectors/flatten_observations_dict_space.py"],
args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=800.0", "--framework=torch", "--algo=PPO"]
)
# IMPALA
py_test(
name = "examples/connectors/flatten_observations_dict_space_impala",
main = "examples/connectors/flatten_observations_dict_space.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "large",
srcs = ["examples/connectors/flatten_observations_dict_space.py"],
args = ["--enable-new-api-stack", "--as-test", "--stop-reward=400.0", "--stop-timesteps=2000000", "--framework=torch", "--algo=IMPALA"]
)
py_test(
name = "examples/connectors/flatten_observations_dict_space_multi_agent_impala",
main = "examples/connectors/flatten_observations_dict_space.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "large",
srcs = ["examples/connectors/flatten_observations_dict_space.py"],
args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=800.0", "--stop-timesteps=2000000", "--framework=torch", "--algo=IMPALA"]
)

# Prev-r/prev actions + LSTM example.
py_test(
Expand All @@ -2297,6 +2293,7 @@ py_test(
)

# MeanStd filtering example.
# PPO
py_test(
name = "examples/connectors/mean_std_filtering_ppo",
main = "examples/connectors/mean_std_filtering.py",
Expand All @@ -2314,6 +2311,25 @@ py_test(
srcs = ["examples/connectors/mean_std_filtering.py"],
args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=-600.0", "--framework=torch", "--algo=PPO", "--num-env-runners=5", "--num-cpus=7"]
)
# IMPALA
# TODO (sven): Make IMPALA learn Pendulum OR make this script flexible to accept
# (lopsided obs) CartPole as well.
# py_test(
# name = "examples/connectors/mean_std_filtering_impala",
# main = "examples/connectors/mean_std_filtering.py",
# tags = ["team:rllib", "exclusive", "examples"],
# size = "medium",
# srcs = ["examples/connectors/mean_std_filtering.py"],
# args = ["--enable-new-api-stack", "--as-test", "--stop-reward=-300.0", "--framework=torch", "--algo=IMPALA", "--num-env-runners=2"]
# )
# py_test(
# name = "examples/connectors/mean_std_filtering_multi_agent_impala",
# main = "examples/connectors/mean_std_filtering.py",
# tags = ["team:rllib", "exclusive", "examples"],
# size = "medium",
# srcs = ["examples/connectors/mean_std_filtering.py"],
# args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=-600.0", "--framework=torch", "--algo=IMPALA", "--num-env-runners=5", "--num-cpus=6"]
# )

# subdirectory: curriculum/
# ....................................
Expand Down Expand Up @@ -3015,7 +3031,6 @@ py_test(
args = ["--run=IMPALA", "--as-test", "--framework=torch", "--stop-reward=28", "--num-cpus=4"]
)

# TODO (Kourosh): tf2 ~5x slower compared to torch on the new stack
py_test(
name = "examples/cartpole_lstm_ppo_tf2",
main = "examples/cartpole_lstm.py",
Expand Down
Loading

0 comments on commit 231a013

Please sign in to comment.