diff --git a/dashboard/modules/serve/tests/test_serve_agent.py b/dashboard/modules/serve/tests/test_serve_agent.py index 665ac41b67d0..1ea921687c1d 100644 --- a/dashboard/modules/serve/tests/test_serve_agent.py +++ b/dashboard/modules/serve/tests/test_serve_agent.py @@ -460,7 +460,21 @@ def test_get_status(ray_start_stop): @pytest.mark.skipif(sys.platform == "darwin", reason="Flaky on OSX.") -def test_get_serve_instance_details(ray_start_stop): +@pytest.mark.parametrize( + "f_deployment_options", + [ + {"name": "f", "ray_actor_options": {"num_cpus": 0.2}}, + { + "name": "f", + "autoscaling_config": { + "min_replicas": 1, + "initial_replicas": 3, + "max_replicas": 10, + }, + }, + ], +) +def test_get_serve_instance_details(ray_start_stop, f_deployment_options): world_import_path = "ray.serve.tests.test_config_files.world.DagNode" fastapi_import_path = "ray.serve.tests.test_config_files.fastapi_deployment.node" config1 = { @@ -474,12 +488,7 @@ def test_get_serve_instance_details(ray_start_stop): "name": "app1", "route_prefix": "/app1", "import_path": world_import_path, - "deployments": [ - { - "name": "f", - "ray_actor_options": {"num_cpus": 0.2}, - }, - ], + "deployments": [f_deployment_options], }, { "name": "app2", @@ -553,7 +562,11 @@ def applications_running(): assert "route_prefix" not in deployment.deployment_config.dict( exclude_unset=True ) - assert len(deployment.replicas) == deployment.deployment_config.num_replicas + if isinstance(deployment.deployment_config.num_replicas, int): + assert ( + len(deployment.replicas) + == deployment.deployment_config.num_replicas + ) for replica in deployment.replicas: assert replica.state == ReplicaState.RUNNING diff --git a/python/ray/serve/schema.py b/python/ray/serve/schema.py index 4e753c8b6477..76ead035e00e 100644 --- a/python/ray/serve/schema.py +++ b/python/ray/serve/schema.py @@ -271,12 +271,10 @@ def _deployment_info_to_schema(name: str, info: DeploymentInfo) -> DeploymentSch codepath) """ - return DeploymentSchema( + schema = DeploymentSchema( name=name, - num_replicas=info.deployment_config.num_replicas, max_concurrent_queries=info.deployment_config.max_concurrent_queries, user_config=info.deployment_config.user_config, - autoscaling_config=info.deployment_config.autoscaling_config, graceful_shutdown_wait_loop_s=( info.deployment_config.graceful_shutdown_wait_loop_s ), @@ -287,6 +285,13 @@ def _deployment_info_to_schema(name: str, info: DeploymentInfo) -> DeploymentSch is_driver_deployment=info.is_driver_deployment, ) + if info.deployment_config.autoscaling_config is not None: + schema.autoscaling_config = info.deployment_config.autoscaling_config + else: + schema.num_replicas = info.deployment_config.num_replicas + + return schema + @PublicAPI(stability="beta") class ServeApplicationSchema(BaseModel, extra=Extra.forbid):