Skip to content

Commit

Permalink
Revert [rllib] Reserve CPUs for replay actors in apex (#4404)
Browse files Browse the repository at this point in the history
* Revert "[rllib] Reserve CPUs for replay actors in apex (#4217)"

This reverts commit 2781d74.

* comment
  • Loading branch information
ericl authored Mar 19, 2019
1 parent c93eb12 commit c6f15a0
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 19 deletions.
16 changes: 0 additions & 16 deletions python/ray/rllib/agents/dqn/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from ray.rllib.evaluation.metrics import collect_metrics
from ray.rllib.utils.annotations import override
from ray.rllib.utils.schedules import ConstantSchedule, LinearSchedule
from ray.tune.trial import Resources

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -142,21 +141,6 @@ class DQNAgent(Agent):
_policy_graph = DQNPolicyGraph
_optimizer_shared_configs = OPTIMIZER_SHARED_CONFIGS

@classmethod
@override(Agent)
def default_resource_request(cls, config):
cf = dict(cls._default_config, **config)
Agent._validate_config(cf)
if cf["optimizer_class"] == "AsyncReplayOptimizer":
extra = cf["optimizer"]["num_replay_buffer_shards"]
else:
extra = 0
return Resources(
cpu=cf["num_cpus_for_driver"],
gpu=cf["num_gpus"],
extra_cpu=cf["num_cpus_per_worker"] * cf["num_workers"] + extra,
extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"])

@override(Agent)
def _init(self):
self._validate_config()
Expand Down
5 changes: 3 additions & 2 deletions python/ray/rllib/optimizers/async_replay_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,7 @@ def _step(self):
return sample_timesteps, train_timesteps


# reserve 1 CPU so that our method calls don't get stalled
@ray.remote(num_cpus=1)
@ray.remote(num_cpus=0)
class ReplayActor(object):
"""A replay buffer shard.
Expand Down Expand Up @@ -317,6 +316,8 @@ def stats(self, debug=False):
return stat


# note: we set num_cpus=0 to avoid failing to create replay actors when
# resources are fragmented. This isn't ideal.
@ray.remote(num_cpus=0)
class BatchReplayActor(object):
"""The batch replay version of the replay actor.
Expand Down
2 changes: 1 addition & 1 deletion python/ray/rllib/tests/test_supported_spaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def check_support_multiagent(alg, config):

class ModelSupportedSpaces(unittest.TestCase):
def setUp(self):
ray.init(num_cpus=10)
ray.init(num_cpus=4)

def tearDown(self):
ray.shutdown()
Expand Down

0 comments on commit c6f15a0

Please sign in to comment.