Skip to content

Commit

Permalink
[Autoscaler][Local Node Provider] Log a warning if max_workers < len(…
Browse files Browse the repository at this point in the history
…worker_ips) (#24635)

Logs a warning when a user sets max_workers for local node provider less than the number of available ips.

Also removes defaults of 0 for min_workers and max_workers from example configs to help prevent users inadvertantly setting max_workers=0 again.
  • Loading branch information
DmitriGekhtman authored May 10, 2022
1 parent 4c1f271 commit 29eebdf
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 6 deletions.
7 changes: 7 additions & 0 deletions python/ray/autoscaler/_private/local/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ def prepare_manual(config: Dict[str, Any]) -> Dict[str, Any]:
else:
node_type["max_workers"] = max_workers

if max_workers < num_ips:
cli_logger.warning(
f"The value of `max_workers` supplied ({max_workers}) is less"
f" than the number of available worker ips ({num_ips})."
f" At most {max_workers} Ray worker nodes will connect to the cluster."
)

return config


Expand Down
4 changes: 0 additions & 4 deletions python/ray/autoscaler/local/development-example.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
cluster_name: default
min_workers: 0
max_workers: 0
docker:
image: ""
container_name: ""
upscaling_speed: 1.0
idle_timeout_minutes: 5
provider:
type: local
head_ip: YOUR_HEAD_NODE_HOSTNAME
Expand Down
6 changes: 4 additions & 2 deletions python/ray/autoscaler/local/example-full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,14 @@ auth:
# The minimum number of workers nodes to launch in addition to the head
# node. This number should be >= 0.
# Typically, min_workers == max_workers == len(worker_ips).
min_workers: 0
# This field is optional.
min_workers: TYPICALLY_THE_NUMBER_OF_WORKER_IPS

# The maximum number of workers nodes to launch in addition to the head node.
# This takes precedence over min_workers.
# Typically, min_workers == max_workers == len(worker_ips).
max_workers: 0
# This field is optional.
max_workers: TYPICALLY_THE_NUMBER_OF_WORKER_IPS
# The default behavior for manually managed clusters is
# min_workers == max_workers == len(worker_ips),
# meaning that Ray is started on all available nodes of the cluster.
Expand Down
28 changes: 28 additions & 0 deletions python/ray/tests/test_autoscaler_yaml.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import jsonschema
import logging
import mock
import os
import sys
import tempfile
Expand Down Expand Up @@ -262,6 +263,33 @@ def testValidateLocal(self):
# Check that worker config numbers were clipped to 3.
assert prepared_config == expected_prepared

not_enough_workers_config = copy.deepcopy(base_config)

# Max workers is less than than the three available ips.
# The user is probably has probably made an error. Make sure we log a warning.
not_enough_workers_config["max_workers"] = 0
not_enough_workers_config["min_workers"] = 0
with mock.patch(
"ray.autoscaler._private.local.config.cli_logger.warning"
) as warning:
prepared_config = prepare_config(not_enough_workers_config)
warning.assert_called_with(
"The value of `max_workers` supplied (0) is less"
" than the number of available worker ips (3)."
" At most 0 Ray worker nodes will connect to the cluster."
)
expected_prepared = yaml.safe_load(EXPECTED_LOCAL_CONFIG_STR)
# We logged a warning.
# However, prepare_config does not repair the strange config setting:
expected_prepared["max_workers"] = 0
expected_prepared["available_node_types"]["local.cluster.node"][
"max_workers"
] = 0
expected_prepared["available_node_types"]["local.cluster.node"][
"min_workers"
] = 0
assert prepared_config == expected_prepared

def testValidateNetworkConfig(self):
web_yaml = (
"https://raw.githubusercontent.com/ray-project/ray/"
Expand Down

0 comments on commit 29eebdf

Please sign in to comment.