From f400c20246218d933b54d4f21f2117581116cdc5 Mon Sep 17 00:00:00 2001 From: shrekris-anyscale <92341594+shrekris-anyscale@users.noreply.github.com> Date: Wed, 13 Apr 2022 15:04:52 -0700 Subject: [PATCH] [serve] Use controller namespace when getting actors (#23896) Serve gets actors using the current Ray namespace. However, the Ray namespace and the controller namespace may not match when using the `_override_controller_namespace` argument in `serve.start()`. This change ensures that the `get_actor()` calls in `ActorReplicaWrapper` use the controller namespace. This also allows `num_replicas` to be scaled up and down properly when using `_override_controller_namespace`. --- python/ray/serve/deployment_state.py | 12 ++++++-- python/ray/serve/tests/test_standalone2.py | 33 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/python/ray/serve/deployment_state.py b/python/ray/serve/deployment_state.py index 3f123b189cc0..f263bcc1e32f 100644 --- a/python/ray/serve/deployment_state.py +++ b/python/ray/serve/deployment_state.py @@ -431,7 +431,9 @@ def graceful_stop(self) -> Duration: Returns the timeout after which to kill the actor. """ try: - handle = ray.get_actor(self._actor_name) + handle = ray.get_actor( + self._actor_name, namespace=self._controller_namespace + ) self._graceful_shutdown_ref = handle.prepare_for_shutdown.remote() except ValueError: pass @@ -441,7 +443,9 @@ def graceful_stop(self) -> Duration: def check_stopped(self) -> bool: """Check if the actor has exited.""" try: - handle = ray.get_actor(self._actor_name) + handle = ray.get_actor( + self._actor_name, namespace=self._controller_namespace + ) stopped = self._check_obj_ref_ready(self._graceful_shutdown_ref) if stopped: ray.kill(handle, no_restart=True) @@ -573,7 +577,9 @@ def check_health(self) -> bool: def force_stop(self): """Force the actor to exit without shutting down gracefully.""" try: - ray.kill(ray.get_actor(self._actor_name)) + ray.kill( + ray.get_actor(self._actor_name, namespace=self._controller_namespace) + ) except ValueError: pass diff --git a/python/ray/serve/tests/test_standalone2.py b/python/ray/serve/tests/test_standalone2.py index fe7a210d3506..4617d755423a 100644 --- a/python/ray/serve/tests/test_standalone2.py +++ b/python/ray/serve/tests/test_standalone2.py @@ -90,6 +90,39 @@ def f(*args): serve.shutdown() +@pytest.mark.parametrize("detached", [True, False]) +def test_update_num_replicas_with_overriden_namespace(shutdown_ray, detached): + """Test updating num_replicas with overriden namespace.""" + + ray_namespace = "ray_namespace" + controller_namespace = "controller_namespace" + + ray.init(namespace=ray_namespace) + serve.start(detached=detached, _override_controller_namespace=controller_namespace) + + @serve.deployment(num_replicas=2) + def f(*args): + return "got f" + + f.deploy() + + actors = ray.util.list_named_actors(all_namespaces=True) + + f.options(num_replicas=4).deploy() + updated_actors = ray.util.list_named_actors(all_namespaces=True) + + # Check that only 2 new replicas were created + assert len(updated_actors) == len(actors) + 2 + + f.options(num_replicas=1).deploy() + updated_actors = ray.util.list_named_actors(all_namespaces=True) + + # Check that all but 1 replica has spun down + assert len(updated_actors) == len(actors) - 1 + + serve.shutdown() + + @pytest.mark.parametrize("detached", [True, False]) def test_refresh_controller_after_death(shutdown_ray, detached): """Check if serve.start() refreshes the controller handle if it's dead."""