-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Serve] Track user-configured options in Serve deployments #28313
Changes from 39 commits
a38756a
23b7c9b
0523be8
0e245ee
4e62084
fc57c3e
d09e71b
48e291a
80a8a85
411bd26
fa5f4a6
2658d7e
dffaa2c
62a1258
e4a0d8b
5ffa7a7
a3f0e76
4ec73c7
8f84682
c1f83f9
190491e
a64a651
1fc6cbf
7ba9676
c5748e3
aa075b6
b7c34c0
ba0892e
9b807f6
90a15dd
af68e2e
696ae58
9a3cb6b
0463c2e
2fba994
67fca53
f1b9bb7
6f5502c
842ceee
23e7ce5
f871ef0
1bfb333
b41a42c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ | |
import traceback | ||
from enum import Enum | ||
from functools import wraps | ||
from typing import Dict, Iterable, List, Tuple | ||
from typing import Dict, Iterable, List, Tuple, TypeVar, Union | ||
|
||
import fastapi.encoders | ||
import numpy as np | ||
|
@@ -43,6 +43,11 @@ class DEFAULT(Enum): | |
VALUE = 1 | ||
|
||
|
||
# Type alias: objects that can be DEFAULT.VALUE have type Default[T] | ||
T = TypeVar("T") | ||
Default = Union[DEFAULT, T] | ||
Comment on lines
+46
to
+48
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is so cool, learned something new :D.
|
||
|
||
|
||
def parse_request_item(request_item): | ||
if len(request_item.args) == 1: | ||
arg = request_item.args[0] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,7 @@ | |
from ray.serve._private.logging_utils import LoggingContext | ||
from ray.serve._private.utils import ( | ||
DEFAULT, | ||
Default, | ||
ensure_serialization_context, | ||
in_interactive_shell, | ||
install_serve_encoders_to_fastapi, | ||
|
@@ -253,80 +254,81 @@ def deployment(func_or_class: Callable) -> Deployment: | |
|
||
@overload | ||
def deployment( | ||
name: Optional[str] = None, | ||
version: Optional[str] = None, | ||
num_replicas: Optional[int] = None, | ||
init_args: Optional[Tuple[Any]] = None, | ||
init_kwargs: Optional[Dict[Any, Any]] = None, | ||
route_prefix: Union[str, None, DEFAULT] = DEFAULT.VALUE, | ||
ray_actor_options: Optional[Dict] = None, | ||
user_config: Optional[Any] = None, | ||
max_concurrent_queries: Optional[int] = None, | ||
autoscaling_config: Optional[Union[Dict, AutoscalingConfig]] = None, | ||
graceful_shutdown_wait_loop_s: Optional[float] = None, | ||
graceful_shutdown_timeout_s: Optional[float] = None, | ||
health_check_period_s: Optional[float] = None, | ||
health_check_timeout_s: Optional[float] = None, | ||
name: Default[str] = DEFAULT.VALUE, | ||
version: Default[str] = DEFAULT.VALUE, | ||
num_replicas: Default[int] = DEFAULT.VALUE, | ||
init_args: Default[Tuple[Any]] = DEFAULT.VALUE, | ||
init_kwargs: Default[Dict[Any, Any]] = DEFAULT.VALUE, | ||
route_prefix: Default[Union[str, None]] = DEFAULT.VALUE, | ||
ray_actor_options: Default[Dict] = DEFAULT.VALUE, | ||
user_config: Default[Any] = DEFAULT.VALUE, | ||
max_concurrent_queries: Default[int] = DEFAULT.VALUE, | ||
autoscaling_config: Default[Union[Dict, AutoscalingConfig]] = DEFAULT.VALUE, | ||
graceful_shutdown_wait_loop_s: Default[float] = DEFAULT.VALUE, | ||
graceful_shutdown_timeout_s: Default[float] = DEFAULT.VALUE, | ||
health_check_period_s: Default[float] = DEFAULT.VALUE, | ||
health_check_timeout_s: Default[float] = DEFAULT.VALUE, | ||
) -> Callable[[Callable], Deployment]: | ||
pass | ||
|
||
|
||
@PublicAPI(stability="beta") | ||
def deployment( | ||
_func_or_class: Optional[Callable] = None, | ||
name: Optional[str] = None, | ||
version: Optional[str] = None, | ||
num_replicas: Optional[int] = None, | ||
init_args: Optional[Tuple[Any]] = None, | ||
init_kwargs: Optional[Dict[Any, Any]] = None, | ||
route_prefix: Union[str, None, DEFAULT] = DEFAULT.VALUE, | ||
ray_actor_options: Optional[Dict] = None, | ||
user_config: Optional[Any] = None, | ||
max_concurrent_queries: Optional[int] = None, | ||
autoscaling_config: Optional[Union[Dict, AutoscalingConfig]] = None, | ||
graceful_shutdown_wait_loop_s: Optional[float] = None, | ||
graceful_shutdown_timeout_s: Optional[float] = None, | ||
health_check_period_s: Optional[float] = None, | ||
health_check_timeout_s: Optional[float] = None, | ||
name: Default[str] = DEFAULT.VALUE, | ||
version: Default[str] = DEFAULT.VALUE, | ||
num_replicas: Default[Optional[int]] = DEFAULT.VALUE, | ||
init_args: Default[Tuple[Any]] = DEFAULT.VALUE, | ||
init_kwargs: Default[Dict[Any, Any]] = DEFAULT.VALUE, | ||
route_prefix: Default[Union[str, None]] = DEFAULT.VALUE, | ||
ray_actor_options: Default[Dict] = DEFAULT.VALUE, | ||
user_config: Default[Optional[Any]] = DEFAULT.VALUE, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please correct me if i am wrong, we are having There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It’s not quite setting every attribute to 1 here. I’m using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
max_concurrent_queries: Default[int] = DEFAULT.VALUE, | ||
autoscaling_config: Default[Union[Dict, AutoscalingConfig, None]] = DEFAULT.VALUE, | ||
graceful_shutdown_wait_loop_s: Default[float] = DEFAULT.VALUE, | ||
graceful_shutdown_timeout_s: Default[float] = DEFAULT.VALUE, | ||
health_check_period_s: Default[float] = DEFAULT.VALUE, | ||
health_check_timeout_s: Default[float] = DEFAULT.VALUE, | ||
) -> Callable[[Callable], Deployment]: | ||
"""Define a Serve deployment. | ||
|
||
Args: | ||
name (Optional[str]): Globally-unique name identifying this deployment. | ||
If not provided, the name of the class or function will be used. | ||
version [DEPRECATED] (Optional[str]): Version of the deployment. This is used to | ||
indicate a code change for the deployment; when it is re-deployed | ||
with a version change, a rolling update of the replicas will be | ||
performed. If not provided, every deployment will be treated as a | ||
new version. | ||
num_replicas (Optional[int]): The number of processes to start up that | ||
name (Default[str]): Globally-unique name identifying this | ||
deployment. If not provided, the name of the class or function will | ||
be used. | ||
version [DEPRECATED] (Default[str]): Version of the deployment. | ||
This is used to indicate a code change for the deployment; when it | ||
is re-deployed with a version change, a rolling update of the | ||
replicas will be performed. If not provided, every deployment will | ||
be treated as a new version. | ||
num_replicas (Default[Optional[int]]): The number of processes to start up that | ||
will handle requests to this deployment. Defaults to 1. | ||
init_args (Optional[Tuple]): Positional args to be passed to the class | ||
constructor when starting up deployment replicas. These can also be | ||
passed when you call `.deploy()` on the returned Deployment. | ||
init_kwargs (Optional[Dict]): Keyword args to be passed to the class | ||
constructor when starting up deployment replicas. These can also be | ||
passed when you call `.deploy()` on the returned Deployment. | ||
route_prefix (Optional[str]): Requests to paths under this HTTP path | ||
prefix will be routed to this deployment. Defaults to '/{name}'. | ||
When set to 'None', no HTTP endpoint will be created. | ||
init_args (Default[Tuple[Any]]): Positional args to be passed to the | ||
class constructor when starting up deployment replicas. These can | ||
also be passed when you call `.deploy()` on the returned Deployment. | ||
init_kwargs (Default[Dict[Any, Any]]): Keyword args to be passed to the | ||
class constructor when starting up deployment replicas. These can | ||
also be passed when you call `.deploy()` on the returned Deployment. | ||
route_prefix (Default[Union[str, None]]): Requests to paths under this | ||
HTTP path prefix will be routed to this deployment. Defaults to | ||
'/{name}'. When set to 'None', no HTTP endpoint will be created. | ||
Routing is done based on longest-prefix match, so if you have | ||
deployment A with a prefix of '/a' and deployment B with a prefix | ||
of '/a/b', requests to '/a', '/a/', and '/a/c' go to A and requests | ||
to '/a/b', '/a/b/', and '/a/b/c' go to B. Routes must not end with | ||
a '/' unless they're the root (just '/'), which acts as a | ||
catch-all. | ||
ray_actor_options: Options to be passed to the Ray actor | ||
constructor such as resource requirements. Valid options are | ||
ray_actor_options (Default[Dict]): Options to be passed to the Ray | ||
actor constructor such as resource requirements. Valid options are | ||
`accelerator_type`, `memory`, `num_cpus`, `num_gpus`, | ||
`object_store_memory`, `resources`, and `runtime_env`. | ||
user_config (Optional[Any]): Config to pass to the | ||
user_config (Default[Optional[Any]]): Config to pass to the | ||
reconfigure method of the deployment. This can be updated | ||
dynamically without changing the version of the deployment and | ||
restarting its replicas. The user_config must be json-serializable | ||
to keep track of updates, so it must only contain json-serializable | ||
types, or json-serializable types nested in lists and dictionaries. | ||
max_concurrent_queries (Optional[int]): The maximum number of queries | ||
max_concurrent_queries (Default[int]): The maximum number of queries | ||
that will be sent to a replica of this deployment without receiving | ||
a response. Defaults to 100. | ||
|
||
|
@@ -344,26 +346,35 @@ def deployment( | |
Deployment | ||
""" | ||
|
||
# Create list of all user-configured options from keyword args | ||
user_configured_option_names = [ | ||
option | ||
for option, value in locals().items() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe add a comment, we should not have any local variable because of using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah good idea. We can still have local variables, but they should be defined after this list. Creating that list should be the first thing that happens in the function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, the order is difficult to guarantee and audit, better to have a way to predefine user-configured attributes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think unit tests should be able to catch any issues with the ordering since any local variables that are defined before this list will show up as a user-configured attribute and cause the unit tests to fail. What do you mean by predefine user-configured attributes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Deployment can provide the user_configured_option_names attributes, you can filter the attributes from |
||
if option != "_func_or_class" and value is not DEFAULT.VALUE | ||
] | ||
|
||
# Num of replicas should not be 0. | ||
# TODO(Sihan) seperate num_replicas attribute from internal and api | ||
if num_replicas == 0: | ||
raise ValueError("num_replicas is expected to larger than 0") | ||
|
||
if num_replicas is not None and autoscaling_config is not None: | ||
if num_replicas not in [DEFAULT.VALUE, None] and autoscaling_config not in [ | ||
DEFAULT.VALUE, | ||
None, | ||
]: | ||
raise ValueError( | ||
"Manually setting num_replicas is not allowed when " | ||
"autoscaling_config is provided." | ||
) | ||
|
||
if version is not None: | ||
if version is not DEFAULT.VALUE: | ||
logger.warning( | ||
"DeprecationWarning: `version` in `@serve.deployment` has been deprecated. " | ||
"Explicitly specifying version will raise an error in the future!" | ||
) | ||
|
||
config = DeploymentConfig.from_default( | ||
ignore_none=True, | ||
num_replicas=num_replicas, | ||
num_replicas=num_replicas if num_replicas is not None else 1, | ||
user_config=user_config, | ||
max_concurrent_queries=max_concurrent_queries, | ||
autoscaling_config=autoscaling_config, | ||
|
@@ -372,17 +383,20 @@ def deployment( | |
health_check_period_s=health_check_period_s, | ||
health_check_timeout_s=health_check_timeout_s, | ||
) | ||
config.user_configured_option_names = set(user_configured_option_names) | ||
|
||
def decorator(_func_or_class): | ||
return Deployment( | ||
_func_or_class, | ||
name if name is not None else _func_or_class.__name__, | ||
name if name is not DEFAULT.VALUE else _func_or_class.__name__, | ||
config, | ||
version=version, | ||
init_args=init_args, | ||
init_kwargs=init_kwargs, | ||
version=(version if version is not DEFAULT.VALUE else None), | ||
init_args=(init_args if init_args is not DEFAULT.VALUE else None), | ||
init_kwargs=(init_kwargs if init_kwargs is not DEFAULT.VALUE else None), | ||
route_prefix=route_prefix, | ||
ray_actor_options=ray_actor_options, | ||
ray_actor_options=( | ||
ray_actor_options if ray_actor_options is not DEFAULT.VALUE else None | ||
), | ||
_internal=True, | ||
) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add a comment on why do we need this flag
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good, I added a comment.