Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[serve] Prevent mixing single/multi-app config deployment #33340

Merged
merged 6 commits into from
Mar 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions dashboard/modules/serve/serve_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
CURRENT_VERSION,
VersionResponse,
)
from ray.exceptions import RayTaskError

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
Expand Down Expand Up @@ -228,9 +229,15 @@ def submit_config(self, config):
),
)

client.deploy_apps(config)

return Response()
try:
client.deploy_apps(config)
except RayTaskError as e:
zcin marked this conversation as resolved.
Show resolved Hide resolved
return Response(
status=400,
text=str(e),
)
else:
return Response()

async def get_serve_controller(self):
"""Gets the ServeController to the this cluster's Serve app.
Expand Down
100 changes: 100 additions & 0 deletions dashboard/modules/serve/tests/test_serve_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,106 @@ def applications_running():
print("Finished checking application details.")


@pytest.mark.skipif(sys.platform == "darwin", reason="Flaky on OSX.")
def test_deploy_single_then_multi(ray_start_stop):
world_import_path = "ray.serve.tests.test_config_files.world.DagNode"
pizza_import_path = "ray.serve.tests.test_config_files.pizza.serve_dag"
multi_app_config = {
"host": "127.0.0.1",
"port": 8000,
"applications": [
{
"name": "app1",
"route_prefix": "/app1",
"import_path": world_import_path,
},
{
"name": "app2",
"route_prefix": "/app2",
"import_path": pizza_import_path,
},
],
}
single_app_config = {
"host": "127.0.0.1",
"port": 8000,
"import_path": world_import_path,
}

def check_app():
wait_for_condition(
lambda: requests.post("http://localhost:8000/").text == "wonderful world",
timeout=15,
)

# Deploy single app config
deploy_and_check_config(single_app_config)
check_app()

# Deploying multi app config afterwards should fail
put_response = requests.put(GET_OR_PUT_URL_V2, json=multi_app_config, timeout=5)
assert put_response.status_code == 400
print(put_response.text)

# The original application should still be up and running
check_app()


@pytest.mark.skipif(sys.platform == "darwin", reason="Flaky on OSX.")
def test_deploy_multi_then_single(ray_start_stop):
world_import_path = "ray.serve.tests.test_config_files.world.DagNode"
pizza_import_path = "ray.serve.tests.test_config_files.pizza.serve_dag"
multi_app_config = {
"host": "127.0.0.1",
"port": 8000,
"applications": [
{
"name": "app1",
"route_prefix": "/app1",
"import_path": world_import_path,
},
{
"name": "app2",
"route_prefix": "/app2",
"import_path": pizza_import_path,
},
],
}
single_app_config = {
"host": "127.0.0.1",
"port": 8000,
"import_path": world_import_path,
}

def check_apps():
wait_for_condition(
lambda: requests.post("http://localhost:8000/app1").text
== "wonderful world",
timeout=15,
)
wait_for_condition(
lambda: requests.post("http://localhost:8000/app2", json=["ADD", 2]).json()
== "4 pizzas please!",
timeout=15,
)
wait_for_condition(
lambda: requests.post("http://localhost:8000/app2", json=["MUL", 2]).json()
== "6 pizzas please!",
timeout=15,
)

# Deploy multi app config
deploy_config_multi_app(multi_app_config)
check_apps()

# Deploying single app config afterwards should fail
put_response = requests.put(GET_OR_PUT_URL, json=single_app_config, timeout=5)
assert put_response.status_code == 400

# The original applications should still be up and running
check_apps()


@pytest.mark.skipif(sys.platform == "darwin", reason="Flaky on OSX.")
def test_serve_namespace(ray_start_stop):
"""
Expand Down
12 changes: 12 additions & 0 deletions python/ray/serve/_private/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,18 @@ def deploy_apps(
config: Union[ServeApplicationSchema, ServeDeploySchema],
_blocking: bool = False,
) -> None:
"""Starts a task on the controller that deploys application(s) from a config.

Args:
config: A single-application config (ServeApplicationSchema) or a
multi-application config (ServeDeploySchema)
_blocking: Whether to block until the application is running.

Raises:
RayTaskError: If the deploy task on the controller fails. This can be
because a single-app config was deployed after deploying a multi-app
config, or vice versa.
"""
ray.get(self._controller.deploy_apps.remote(config))

if _blocking:
Expand Down
6 changes: 6 additions & 0 deletions python/ray/serve/_private/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,3 +326,9 @@ def __eq__(self, other):
self._hash == other._hash,
]
)


class ServeDeployMode(str, Enum):
UNSET = "UNSET"
SINGLE_APP = "SINGLE_APP"
MULTI_APP = "MULTI_APP"
48 changes: 29 additions & 19 deletions python/ray/serve/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
NodeId,
RunningReplicaInfo,
StatusOverview,
ServeDeployMode,
)
from ray.serve.config import DeploymentConfig, HTTPOptions, ReplicaConfig
from ray.serve._private.constants import (
Expand All @@ -39,6 +40,7 @@
from ray.serve._private.http_state import HTTPState
from ray.serve._private.logging_utils import configure_component_logger
from ray.serve._private.long_poll import LongPollHost
from ray.serve.exceptions import RayServeException
from ray.serve.schema import (
ServeApplicationSchema,
ServeDeploySchema,
Expand Down Expand Up @@ -153,6 +155,9 @@ async def __init__(
self.deployment_state_manager
)

# Keep track of single-app vs multi-app
self.deploy_mode = ServeDeployMode.UNSET

run_background_task(self.run_control_loop())

self._recover_config_from_checkpoint()
Expand Down Expand Up @@ -490,26 +495,31 @@ def deploy_apps(
# deprecate such usage.
if isinstance(config, ServeApplicationSchema):
config = config.to_deploy_schema()
if self.deploy_mode == ServeDeployMode.MULTI_APP:
raise RayServeException(
"You are trying to deploy a single-application config, however "
"a multi-application config has been deployed to the current "
"Serve instance already. Mixing single-app and multi-app is not "
"allowed. Please either redeploy using the multi-application "
"config format `ServeDeploySchema`, or shutdown and restart Serve "
"to submit a single-app config of format `ServeApplicationSchema`. "
"If you are using the REST API, you can submit a single-app config "
"to the single-app API endpoint `/api/serve/deployments/`."
)
self.deploy_mode = ServeDeployMode.SINGLE_APP
else:
# TODO (zcin): ServeApplicationSchema still needs to have host and port
# fields to support single-app mode, but in multi-app mode the host and port
# fields at the top-level deploy config is used instead. Eventually, after
# migration, we should remove these fields from ServeApplicationSchema.
host, port = config.host, config.port
for app_config in config.applications:
app_config_dict = app_config.dict(exclude_unset=True)
if "host" in app_config_dict:
logger.info(
f"Host {app_config_dict['host']} is set in the config for "
f"application `{app_config.name}`. This will be ignored, as "
f"the host {host} from the top level deploy config is used."
)
if "port" in app_config:
logger.info(
f"Port {app_config_dict['port']} is set in the config for "
f"application `{app_config.name}`. This will be ignored, as "
f"the port {port} from the top level deploy config is used."
)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the warnings here, as in a separate PR I will add validation in the schema itself.

if self.deploy_mode == ServeDeployMode.SINGLE_APP:
raise RayServeException(
"You are trying to deploy a multi-application config, however "
"a single-application config has been deployed to the current "
"Serve instance already. Mixing single-app and multi-app is not "
"allowed. Please either redeploy using the single-application "
"config format `ServeApplicationSchema`, or shutdown and restart "
"Serve to submit a multi-app config of format `ServeDeploySchema`. "
"If you are using the REST API, you can submit a multi-app config "
"to the the multi-app API endpoint `/api/serve/applications/`."
)
self.deploy_mode = ServeDeployMode.MULTI_APP

if not deployment_time:
deployment_time = time.time()
Expand Down
Loading