Skip to content

Commit

Permalink
[RLlib] Tf2 + eager-tracing same speed as framework=tf; Add more test…
Browse files Browse the repository at this point in the history
… coverage for tf2+tracing. (#19981)
  • Loading branch information
sven1977 authored Nov 5, 2021
1 parent 1341bb5 commit a931076
Show file tree
Hide file tree
Showing 25 changed files with 482 additions and 349 deletions.
2 changes: 1 addition & 1 deletion rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ py_test(
)

py_test(
name = "run_regression_tests_frozenlake_appo",
name = "learning_frozenlake_appo",
main = "tests/run_regression_tests.py",
tags = ["team:ml", "learning_tests", "learning_tests_discrete"],
size = "large",
Expand Down
4 changes: 2 additions & 2 deletions rllib/agents/a3c/tests/test_a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def test_a2c_compilation(self):
num_iterations = 1

# Test against all frameworks.
for _ in framework_iterator(config):
for env in ["PongDeterministic-v0"]:
for _ in framework_iterator(config, with_eager_tracing=True):
for env in ["CartPole-v0", "Pendulum-v1", "PongDeterministic-v0"]:
trainer = a3c.A2CTrainer(config=config, env=env)
for i in range(num_iterations):
results = trainer.train()
Expand Down
2 changes: 1 addition & 1 deletion rllib/agents/a3c/tests/test_a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_a3c_compilation(self):
num_iterations = 1

# Test against all frameworks.
for _ in framework_iterator(config):
for _ in framework_iterator(config, with_eager_tracing=True):
for env in ["CartPole-v1", "Pendulum-v1", "PongDeterministic-v0"]:
print("env={}".format(env))
config["model"]["use_lstm"] = env == "CartPole-v1"
Expand Down
2 changes: 1 addition & 1 deletion rllib/agents/cql/tests/test_cql.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_cql_compilation(self):
num_iterations = 4

# Test for tf/torch frameworks.
for fw in framework_iterator(config):
for fw in framework_iterator(config, with_eager_tracing=True):
trainer = cql.CQLTrainer(config=config)
for i in range(num_iterations):
results = trainer.train()
Expand Down
2 changes: 1 addition & 1 deletion rllib/agents/ddpg/tests/test_apex_ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_apex_ddpg_compilation_and_per_worker_epsilon_values(self):
config["learning_starts"] = 0
config["optimizer"]["num_replay_buffer_shards"] = 1
num_iterations = 1
for _ in framework_iterator(config):
for _ in framework_iterator(config, with_eager_tracing=True):
plain_config = config.copy()
trainer = apex_ddpg.ApexDDPGTrainer(
config=plain_config, env="Pendulum-v1")
Expand Down
2 changes: 1 addition & 1 deletion rllib/agents/ddpg/tests/test_ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_ddpg_compilation(self):
num_iterations = 1

# Test against all frameworks.
for _ in framework_iterator(config):
for _ in framework_iterator(config, with_eager_tracing=True):
trainer = ddpg.DDPGTrainer(config=config, env="Pendulum-v1")
for i in range(num_iterations):
results = trainer.train()
Expand Down
2 changes: 1 addition & 1 deletion rllib/agents/dqn/tests/test_apex_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_apex_dqn_compilation_and_per_worker_epsilon_values(self):
config["min_iter_time_s"] = 1
config["optimizer"]["num_replay_buffer_shards"] = 1

for _ in framework_iterator(config):
for _ in framework_iterator(config, with_eager_tracing=True):
plain_config = config.copy()
trainer = apex.ApexTrainer(config=plain_config, env="CartPole-v0")

Expand Down
2 changes: 1 addition & 1 deletion rllib/agents/dqn/tests/test_simple_q.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_simple_q_compilation(self):

num_iterations = 2

for _ in framework_iterator(config):
for _ in framework_iterator(config, with_eager_tracing=True):
trainer = dqn.SimpleQTrainer(config=config, env="CartPole-v0")
rw = trainer.workers.local_worker()
for i in range(num_iterations):
Expand Down
2 changes: 1 addition & 1 deletion rllib/agents/impala/tests/test_impala.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_impala_compilation(self):
num_iterations = 1
env = "CartPole-v0"

for _ in framework_iterator(config):
for _ in framework_iterator(config, with_eager_tracing=True):
local_cfg = config.copy()
for lstm in [False, True]:
local_cfg["num_aggregation_workers"] = 0 if not lstm else 1
Expand Down
2 changes: 1 addition & 1 deletion rllib/agents/ppo/tests/test_appo.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_appo_compilation(self):
config["num_workers"] = 1
num_iterations = 2

for _ in framework_iterator(config):
for _ in framework_iterator(config, with_eager_tracing=True):
print("w/o v-trace")
_config = config.copy()
_config["vtrace"] = False
Expand Down
2 changes: 1 addition & 1 deletion rllib/agents/ppo/tests/test_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_ppo_compilation_and_schedule_mixins(self):
config["compress_observations"] = True
num_iterations = 2

for fw in framework_iterator(config):
for fw in framework_iterator(config, with_eager_tracing=True):
for env in ["FrozenLake-v1", "MsPacmanNoFrameskip-v4"]:
print("Env={}".format(env))
for lstm in [True, False]:
Expand Down
19 changes: 14 additions & 5 deletions rllib/agents/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,23 @@

# === Deep Learning Framework Settings ===
# tf: TensorFlow (static-graph)
# tf2: TensorFlow 2.x (eager)
# tfe: TensorFlow eager
# tf2: TensorFlow 2.x (eager or traced, if eager_tracing=True)
# tfe: TensorFlow eager (or traced, if eager_tracing=True)
# torch: PyTorch
"framework": "tf",
# Enable tracing in eager mode. This greatly improves performance, but
# makes it slightly harder to debug since Python code won't be evaluated
# after the initial eager pass. Only possible if framework=tfe.
# Enable tracing in eager mode. This greatly improves performance
# (speedup ~2x), but makes it slightly harder to debug since Python
# code won't be evaluated after the initial eager pass.
# Only possible if framework=[tf2|tfe].
"eager_tracing": False,
# Maximum number of tf.function re-traces before a runtime error is raised.
# This is to prevent unnoticed retraces of methods inside the
# `..._eager_traced` Policy, which could slow down execution by a
# factor of 4, without the user noticing what the root cause for this
# slowdown could be.
# Only necessary for framework=[tf2|tfe].
# Set to None to ignore the re-trace count and never throw an error.
"eager_max_retraces": 20,

# === Exploration Settings ===
# Default exploration behavior, iff `explore`=None is passed into
Expand Down
18 changes: 11 additions & 7 deletions rllib/examples/env/random_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ class RandomEnv(gym.Env):
Can be instantiated with arbitrary action-, observation-, and reward
spaces. Observations and rewards are generated by simply sampling from the
observation/reward spaces. The probability of a `done=True` can be
configured as well.
observation/reward spaces. The probability of a `done=True` after each
action can be configured, as well as the max episode length.
"""

def __init__(self, config=None):
Expand All @@ -26,8 +26,13 @@ def __init__(self, config=None):
"reward_space",
gym.spaces.Box(low=-1.0, high=1.0, shape=(), dtype=np.float32))
# Chance that an episode ends at any step.
# Note that a max episode length can be specified via
# `max_episode_len`.
self.p_done = config.get("p_done", 0.1)
# A max episode length.
# A max episode length. Even if the `p_done` sampling does not lead
# to a terminus, the episode will end after at most this many
# timesteps.
# Set to 0 or None for using no limit on the episode length.
self.max_episode_len = config.get("max_episode_len", None)
# Whether to check action bounds.
self.check_action_bounds = config.get("check_action_bounds", False)
Expand All @@ -49,11 +54,10 @@ def step(self, action):

self.steps += 1
done = False
# We are done as per our max-episode-len.
if self.max_episode_len is not None and \
self.steps >= self.max_episode_len:
# We are `done` as per our max-episode-len.
if self.max_episode_len and self.steps >= self.max_episode_len:
done = True
# Max not reached yet -> Sample done via p_done.
# Max episode length not reached yet -> Sample `done` via `p_done`.
elif self.p_done > 0.0:
done = bool(
np.random.choice(
Expand Down
3 changes: 2 additions & 1 deletion rllib/examples/models/batch_norm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,8 @@ def __init__(self, obs_space, action_space, num_outputs, model_config,
def forward(self, input_dict, state, seq_lens):
# Set the correct train-mode for our hidden module (only important
# b/c we have some batch-norm layers).
self._hidden_layers.train(mode=input_dict.get("is_training", False))
self._hidden_layers.train(
mode=bool(input_dict.get("is_training", False)))
self._hidden_out = self._hidden_layers(input_dict["obs"])
logits = self._logits(self._hidden_out)
return logits, []
Expand Down
8 changes: 7 additions & 1 deletion rllib/execution/multi_gpu_impl.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from ray.rllib.policy.dynamic_tf_policy import TFMultiGPUTowerStack
from ray.rllib.utils.deprecation import deprecation_warning

deprecation_warning("LocalSyncParallelOptimizer", "TFMultiGPUTowerStack")
# Backward compatibility.
deprecation_warning(
old="ray.rllib.execution.multi_gpu_impl.LocalSyncParallelOptimizer",
new="ray.rllib.policy.dynamic_tf_policy.TFMultiGPUTowerStack",
error=False,
)
# Old name.
LocalSyncParallelOptimizer = TFMultiGPUTowerStack
8 changes: 7 additions & 1 deletion rllib/execution/multi_gpu_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
MultiGPULearnerThread, _MultiGPULoaderThread
from ray.rllib.utils.deprecation import deprecation_warning

deprecation_warning("multi_gpu_learner.py", "multi_gpu_learner_thread.py")
# Backward compatibility.
deprecation_warning(
old="ray.rllib.execution.multi_gpu_learner.py",
new="ray.rllib.execution.multi_gpu_learner_thread.py",
error=False,
)
# Old names.
TFMultiGPULearner = MultiGPULearnerThread
_LoaderThread = _MultiGPULoaderThread
5 changes: 5 additions & 0 deletions rllib/models/modelv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,11 @@ def __call__(
with self.context():
res = self.forward(restored, state or [], seq_lens)

if isinstance(input_dict, SampleBatch):
input_dict.accessed_keys = restored.accessed_keys - {"obs_flat"}
input_dict.deleted_keys = restored.deleted_keys
input_dict.added_keys = restored.added_keys - {"obs_flat"}

if ((not isinstance(res, list) and not isinstance(res, tuple))
or len(res) != 2):
raise ValueError(
Expand Down
4 changes: 2 additions & 2 deletions rllib/policy/dynamic_tf_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def __init__(
True, (), name="is_exploring")

# Placeholder for `is_training` flag.
self._input_dict.is_training = self._get_is_training_placeholder()
self._input_dict.set_training(self._get_is_training_placeholder())

# Multi-GPU towers do not need any action computing/exploration
# graphs.
Expand Down Expand Up @@ -464,7 +464,7 @@ def load_batch_into_buffer(
buffer_index: int = 0,
) -> int:
# Set the is_training flag of the batch.
batch.is_training = True
batch.set_training(True)

# Shortcut for 1 CPU only: Store batch in
# `self._loaded_single_cpu_batch`.
Expand Down
Loading

0 comments on commit a931076

Please sign in to comment.