-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RLlib] Fix SAC/DQN/CQL GPU and multi-GPU. #47179
Changes from all commits
9a7df3d
f0419de
b66780c
35849c5
ab11b56
aa3909b
5468711
2f60378
ae4d29d
0038a22
67a42ee
fb998f2
ce88849
9865ef1
bd090c6
79b1f8f
59605bf
cb842d9
7e98ba3
3040e03
0e083be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -349,6 +349,30 @@ py_test( | |
srcs = ["tuned_examples/dqn/cartpole_dqn.py"], | ||
args = ["--as-test", "--enable-new-api-stack"] | ||
) | ||
py_test( | ||
name = "learning_tests_cartpole_dqn_gpu", | ||
main = "tuned_examples/dqn/cartpole_dqn.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"], | ||
size = "large", | ||
srcs = ["tuned_examples/dqn/cartpole_dqn.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"] | ||
) | ||
py_test( | ||
name = "learning_tests_cartpole_dqn_multi_cpu", | ||
main = "tuned_examples/dqn/cartpole_dqn.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], | ||
size = "large", | ||
srcs = ["tuned_examples/dqn/cartpole_dqn.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"] | ||
) | ||
py_test( | ||
name = "learning_tests_cartpole_dqn_multi_gpu", | ||
main = "tuned_examples/dqn/cartpole_dqn.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"], | ||
size = "large", | ||
srcs = ["tuned_examples/dqn/cartpole_dqn.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"] | ||
) | ||
# MultiAgentCartPole | ||
py_test( | ||
name = "learning_tests_multi_agent_cartpole_dqn", | ||
|
@@ -358,16 +382,29 @@ py_test( | |
srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-cpus=4"] | ||
) | ||
|
||
#@OldAPIStack | ||
py_test( | ||
name = "learning_tests_cartpole_dqn_softq_old_api_stack", | ||
main = "tests/run_regression_tests.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_discrete"], | ||
size = "large", # bazel may complain about it being too long sometimes - large is on purpose as some frameworks take longer | ||
srcs = ["tests/run_regression_tests.py"], | ||
data = ["tuned_examples/dqn/cartpole-dqn-softq.yaml"], | ||
args = ["--dir=tuned_examples/dqn"] | ||
name = "learning_tests_multi_agent_cartpole_dqn_gpu", | ||
main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "gpu"], | ||
size = "large", | ||
srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-cpus=4", "--num-gpus=1"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interesting, I thought this does not work There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. We need to get rid of this confusion some time soon. Note that these are the command line options, not directly translatable to Algo config properties: |
||
) | ||
py_test( | ||
name = "learning_tests_multi_agent_cartpole_dqn_multi_cpu", | ||
main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core"], | ||
size = "large", | ||
srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-cpus=5", "--num-gpus=2"] | ||
) | ||
py_test( | ||
name = "learning_tests_multi_agent_cartpole_dqn_multi_gpu", | ||
main = "tuned_examples/dqn/multi_agent_cartpole_dqn.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu"], | ||
size = "large", | ||
srcs = ["tuned_examples/dqn/multi_agent_cartpole_dqn.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-cpus=4", "--num-gpus=2"] | ||
) | ||
|
||
# IMPALA | ||
|
@@ -669,7 +706,31 @@ py_test( | |
srcs = ["tuned_examples/sac/pendulum_sac.py"], | ||
args = ["--as-test", "--enable-new-api-stack"] | ||
) | ||
|
||
py_test( | ||
name = "learning_tests_pendulum_sac_gpu", | ||
main = "tuned_examples/sac/pendulum_sac.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_continuous", "gpu"], | ||
size = "large", | ||
srcs = ["tuned_examples/sac/pendulum_sac.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=1"] | ||
) | ||
py_test( | ||
name = "learning_tests_pendulum_sac_multi_cpu", | ||
main = "tuned_examples/sac/pendulum_sac.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_continuous"], | ||
size = "large", | ||
srcs = ["tuned_examples/sac/pendulum_sac.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"] | ||
) | ||
py_test( | ||
name = "learning_tests_pendulum_sac_multi_gpu", | ||
main = "tuned_examples/sac/pendulum_sac.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_continuous", "multi_gpu"], | ||
size = "large", | ||
srcs = ["tuned_examples/sac/pendulum_sac.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-gpus=2"] | ||
) | ||
# MultiAgentPendulum | ||
py_test( | ||
name = "learning_tests_multi_agent_pendulum_sac", | ||
main = "tuned_examples/sac/multi_agent_pendulum_sac.py", | ||
|
@@ -678,7 +739,22 @@ py_test( | |
srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-cpus=4"] | ||
) | ||
|
||
py_test( | ||
name = "learning_tests_multi_agent_pendulum_sac_gpu", | ||
main = "tuned_examples/sac/multi_agent_pendulum_sac.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_continuous", "gpu"], | ||
size = "large", | ||
srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"], | ||
args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-cpus=4", "--num-gpus=1"] | ||
) | ||
py_test( | ||
name = "learning_tests_multi_agent_pendulum_sac_multi_cpu", | ||
main = "tuned_examples/sac/multi_agent_pendulum_sac.py", | ||
tags = ["team:rllib", "exclusive", "learning_tests", "torch_only", "learning_tests_continuous"], | ||
size = "large", | ||
srcs = ["tuned_examples/sac/multi_agent_pendulum_sac.py"], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we actually need the |
||
args = ["--enable-new-api-stack", "--num-agents=2", "--num-gpus=2"] | ||
) | ||
py_test( | ||
name = "learning_tests_multi_agent_pendulum_sac_multi_gpu", | ||
main = "tuned_examples/sac/multi_agent_pendulum_sac.py", | ||
|
@@ -3240,7 +3316,7 @@ py_test( | |
name = "examples/rl_modules/custom_lstm_rl_module", | ||
main = "examples/rl_modules/custom_lstm_rl_module.py", | ||
tags = ["team:rllib", "examples"], | ||
size = "medium", | ||
size = "large", | ||
srcs = ["examples/rl_modules/custom_lstm_rl_module.py"], | ||
args = ["--as-test", "--enable-new-api-stack"], | ||
) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,6 +58,7 @@ | |
NUM_MODULE_STEPS_TRAINED, | ||
NUM_MODULE_STEPS_TRAINED_LIFETIME, | ||
NUM_TARGET_UPDATES, | ||
REPLAY_BUFFER_ADD_DATA_TIMER, | ||
REPLAY_BUFFER_SAMPLE_TIMER, | ||
REPLAY_BUFFER_UPDATE_PRIOS_TIMER, | ||
SAMPLE_TIMER, | ||
|
@@ -556,7 +557,7 @@ def calculate_rr_weights(config: AlgorithmConfig) -> List[float]: | |
# This is to set freshly rollout-collected data in relation to | ||
# the data we pull from the replay buffer (which also contains old | ||
# samples). | ||
native_ratio = config.train_batch_size / ( | ||
native_ratio = config.total_train_batch_size / ( | ||
config.get_rollout_fragment_length() | ||
* config.num_envs_per_env_runner | ||
# Add one to workers because the local | ||
|
@@ -628,13 +629,15 @@ def _training_step_new_api_stack(self, *, with_noise_reset) -> ResultDict: | |
_uses_new_env_runners=True, | ||
_return_metrics=True, | ||
) | ||
# Add the sampled experiences to the replay buffer. | ||
self.local_replay_buffer.add(episodes) | ||
# Reduce EnvRunner metrics over the n EnvRunners. | ||
self.metrics.merge_and_log_n_dicts( | ||
env_runner_results, key=ENV_RUNNER_RESULTS | ||
) | ||
|
||
# Add the sampled experiences to the replay buffer. | ||
with self.metrics.log_time((TIMERS, REPLAY_BUFFER_ADD_DATA_TIMER)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice :) |
||
self.local_replay_buffer.add(episodes) | ||
|
||
self.metrics.log_dict( | ||
self.metrics.peek( | ||
(ENV_RUNNER_RESULTS, NUM_AGENT_STEPS_SAMPLED), default={} | ||
|
@@ -684,7 +687,7 @@ def _training_step_new_api_stack(self, *, with_noise_reset) -> ResultDict: | |
# Sample a list of episodes used for learning from the replay buffer. | ||
with self.metrics.log_time((TIMERS, REPLAY_BUFFER_SAMPLE_TIMER)): | ||
episodes = self.local_replay_buffer.sample( | ||
num_items=self.config.train_batch_size, | ||
num_items=self.config.total_train_batch_size, | ||
n_step=self.config.n_step, | ||
gamma=self.config.gamma, | ||
beta=self.config.replay_buffer_config.get("beta"), | ||
|
@@ -707,14 +710,16 @@ def _training_step_new_api_stack(self, *, with_noise_reset) -> ResultDict: | |
# disk or WandB, they might be very large). | ||
td_errors = defaultdict(list) | ||
for res in learner_results: | ||
for mid, m_res in res.items(): | ||
if TD_ERROR_KEY in m_res: | ||
td_errors[mid].extend( | ||
convert_to_numpy(m_res.pop(TD_ERROR_KEY).peek()) | ||
for module_id, module_results in res.items(): | ||
if TD_ERROR_KEY in module_results: | ||
td_errors[module_id].extend( | ||
convert_to_numpy( | ||
module_results.pop(TD_ERROR_KEY).peek() | ||
) | ||
) | ||
td_errors = { | ||
mid: {TD_ERROR_KEY: np.concatenate(s, axis=0)} | ||
for mid, s in td_errors.items() | ||
module_id: {TD_ERROR_KEY: np.concatenate(s, axis=0)} | ||
for module_id, s in td_errors.items() | ||
} | ||
self.metrics.merge_and_log_n_dicts( | ||
learner_results, key=LEARNER_RESULTS | ||
|
@@ -812,7 +817,7 @@ def _training_step_old_and_hybrid_api_stack(self) -> ResultDict: | |
# Sample training batch (MultiAgentBatch) from replay buffer. | ||
train_batch = sample_min_n_steps_from_buffer( | ||
self.local_replay_buffer, | ||
self.config.train_batch_size, | ||
self.config.total_train_batch_size, | ||
count_by_agent_steps=self.config.count_steps_by == "agent_steps", | ||
) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does
num-gpus=1
use a local or remote learner? Imo, we should test with both. What do you think @sven1977 ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For IMPALA/APPO, we should add a validation that these should never be run with a local Learner, b/c these are async algos that suffer tremendously from having the Learner not-async. Will add this check/error in a separate PR ...