From 9e70bff92334641998f8b8235409a3bcd91e3fed Mon Sep 17 00:00:00 2001 From: Ed Oakes Date: Sun, 31 Oct 2021 10:38:41 -0500 Subject: [PATCH 1/2] BackendConfig -> DeploymentConfig --- .../java/io/ray/serve/DeploymentInfo.java | 10 +-- .../java/io/ray/serve/RayServeReplica.java | 14 ++--- .../io/ray/serve/RayServeWrappedReplica.java | 18 +++--- .../main/java/io/ray/serve/ReplicaSet.java | 6 +- .../src/main/java/io/ray/serve/Router.java | 2 +- .../io/ray/serve/poll/LongPollClient.java | 2 +- .../io/ray/serve/util/ServeProtoUtil.java | 39 ++++++------ .../java/io/ray/serve/ProxyActorTest.java | 4 +- .../java/io/ray/serve/RayServeHandleTest.java | 12 ++-- .../io/ray/serve/RayServeReplicaTest.java | 12 ++-- .../java/io/ray/serve/ReplicaSetTest.java | 14 ++--- .../test/java/io/ray/serve/RouterTest.java | 12 ++-- .../io/ray/serve/poll/LongPollClientTest.java | 16 ++--- python/ray/serve/api.py | 60 +++++++++--------- python/ray/serve/backend_state.py | 42 +++++++------ python/ray/serve/common.py | 6 +- python/ray/serve/config.py | 61 +++++++++---------- python/ray/serve/controller.py | 30 ++++----- python/ray/serve/replica.py | 43 ++++++------- python/ray/serve/tests/test_backend_state.py | 9 +-- python/ray/serve/tests/test_config.py | 30 ++++----- src/ray/protobuf/serve.proto | 22 +++---- 22 files changed, 236 insertions(+), 228 deletions(-) diff --git a/java/serve/src/main/java/io/ray/serve/DeploymentInfo.java b/java/serve/src/main/java/io/ray/serve/DeploymentInfo.java index 0a6117b21133..943be34514c8 100644 --- a/java/serve/src/main/java/io/ray/serve/DeploymentInfo.java +++ b/java/serve/src/main/java/io/ray/serve/DeploymentInfo.java @@ -6,18 +6,18 @@ public class DeploymentInfo implements Serializable { private static final long serialVersionUID = -4198364411759931955L; - private byte[] backendConfig; + private byte[] deploymentConfig; private ReplicaConfig replicaConfig; private byte[] deploymentVersion; - public byte[] getBackendConfig() { - return backendConfig; + public byte[] getDeploymentConfig() { + return deploymentConfig; } - public void setBackendConfig(byte[] backendConfig) { - this.backendConfig = backendConfig; + public void setDeploymentConfig(byte[] deploymentConfig) { + this.deploymentConfig = deploymentConfig; } public ReplicaConfig getReplicaConfig() { diff --git a/java/serve/src/main/java/io/ray/serve/RayServeReplica.java b/java/serve/src/main/java/io/ray/serve/RayServeReplica.java index c8445ca94238..7151f175fd65 100644 --- a/java/serve/src/main/java/io/ray/serve/RayServeReplica.java +++ b/java/serve/src/main/java/io/ray/serve/RayServeReplica.java @@ -9,7 +9,7 @@ import io.ray.runtime.metric.Metrics; import io.ray.runtime.serializer.MessagePackSerializer; import io.ray.serve.api.Serve; -import io.ray.serve.generated.BackendConfig; +import io.ray.serve.generated.DeploymentConfig; import io.ray.serve.generated.DeploymentVersion; import io.ray.serve.generated.RequestWrapper; import io.ray.serve.poll.KeyListener; @@ -34,7 +34,7 @@ public class RayServeReplica { private String replicaTag; - private BackendConfig config; + private DeploymentConfig config; private AtomicInteger numOngoingRequests = new AtomicInteger(); @@ -58,19 +58,19 @@ public class RayServeReplica { public RayServeReplica( Object callable, - BackendConfig backendConfig, + DeploymentConfig deploymentConfig, DeploymentVersion version, BaseActorHandle actorHandle) { this.backendTag = Serve.getReplicaContext().getBackendTag(); this.replicaTag = Serve.getReplicaContext().getReplicaTag(); this.callable = callable; - this.config = backendConfig; + this.config = deploymentConfig; this.version = version; Map keyListeners = new HashMap<>(); keyListeners.put( new KeyType(LongPollNamespace.BACKEND_CONFIGS, backendTag), - newConfig -> updateBackendConfigs(newConfig)); + newConfig -> updateDeploymentConfigs(newConfig)); this.longPollClient = new LongPollClient(actorHandle, keyListeners); this.longPollClient.start(); registerMetrics(); @@ -319,8 +319,8 @@ public DeploymentVersion reconfigure(Object userConfig) { * * @param newConfig the new configuration of backend */ - private void updateBackendConfigs(Object newConfig) { - config = (BackendConfig) newConfig; + private void updateDeploymentConfigs(Object newConfig) { + config = (DeploymentConfig) newConfig; } public DeploymentVersion getVersion() { diff --git a/java/serve/src/main/java/io/ray/serve/RayServeWrappedReplica.java b/java/serve/src/main/java/io/ray/serve/RayServeWrappedReplica.java index e3228c9c0aaf..545feb81da94 100644 --- a/java/serve/src/main/java/io/ray/serve/RayServeWrappedReplica.java +++ b/java/serve/src/main/java/io/ray/serve/RayServeWrappedReplica.java @@ -6,7 +6,7 @@ import io.ray.api.Ray; import io.ray.runtime.serializer.MessagePackSerializer; import io.ray.serve.api.Serve; -import io.ray.serve.generated.BackendConfig; +import io.ray.serve.generated.DeploymentConfig; import io.ray.serve.generated.DeploymentVersion; import io.ray.serve.generated.RequestMetadata; import io.ray.serve.util.ReflectUtil; @@ -27,17 +27,17 @@ public RayServeWrappedReplica( String replicaTag, String backendDef, byte[] initArgsbytes, - byte[] backendConfigBytes, + byte[] deploymentConfigBytes, byte[] deploymentVersionBytes, String controllerName) throws ClassNotFoundException, NoSuchMethodException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, IOException { - // Parse BackendConfig. - BackendConfig backendConfig = ServeProtoUtil.parseBackendConfig(backendConfigBytes); + // Parse DeploymentConfig. + DeploymentConfig deploymentConfig = ServeProtoUtil.parseDeploymentConfig(deploymentConfigBytes); // Parse init args. - Object[] initArgs = parseInitArgs(initArgsbytes, backendConfig); + Object[] initArgs = parseInitArgs(initArgsbytes, deploymentConfig); // Instantiate the object defined by backendDef. Class backendClass = Class.forName(backendDef); @@ -57,7 +57,7 @@ public RayServeWrappedReplica( backend = new RayServeReplica( callable, - backendConfig, + deploymentConfig, ServeProtoUtil.parseDeploymentVersion(deploymentVersionBytes), optional.get()); } @@ -71,19 +71,19 @@ public RayServeWrappedReplica( replicaTag, deploymentInfo.getReplicaConfig().getBackendDef(), deploymentInfo.getReplicaConfig().getInitArgs(), - deploymentInfo.getBackendConfig(), + deploymentInfo.getDeploymentConfig(), deploymentInfo.getDeploymentVersion(), controllerName); } - private Object[] parseInitArgs(byte[] initArgsbytes, BackendConfig backendConfig) + private Object[] parseInitArgs(byte[] initArgsbytes, DeploymentConfig deploymentConfig) throws IOException { if (initArgsbytes == null || initArgsbytes.length == 0) { return new Object[0]; } - if (!backendConfig.getIsCrossLanguage()) { + if (!deploymentConfig.getIsCrossLanguage()) { // If the construction request is from Java API, deserialize initArgsbytes to Object[] // directly. return MessagePackSerializer.decode(initArgsbytes, Object[].class); diff --git a/java/serve/src/main/java/io/ray/serve/ReplicaSet.java b/java/serve/src/main/java/io/ray/serve/ReplicaSet.java index 1c7e757bba44..0b5646f4bd5c 100644 --- a/java/serve/src/main/java/io/ray/serve/ReplicaSet.java +++ b/java/serve/src/main/java/io/ray/serve/ReplicaSet.java @@ -9,7 +9,7 @@ import io.ray.runtime.metric.Metrics; import io.ray.runtime.metric.TagKey; import io.ray.serve.generated.ActorSet; -import io.ray.serve.generated.BackendConfig; +import io.ray.serve.generated.DeploymentConfig; import io.ray.serve.util.CollectionUtil; import java.util.ArrayList; import java.util.HashSet; @@ -48,8 +48,8 @@ public ReplicaSet(String backendTag) { .register()); } - public void setMaxConcurrentQueries(Object backendConfig) { - int newValue = ((BackendConfig) backendConfig).getMaxConcurrentQueries(); + public void setMaxConcurrentQueries(Object deploymentConfig) { + int newValue = ((DeploymentConfig) deploymentConfig).getMaxConcurrentQueries(); if (newValue != this.maxConcurrentQueries) { this.maxConcurrentQueries = newValue; LOGGER.info("ReplicaSet: changing max_concurrent_queries to {}", newValue); diff --git a/java/serve/src/main/java/io/ray/serve/Router.java b/java/serve/src/main/java/io/ray/serve/Router.java index 5ef339d77767..0b744c835a09 100644 --- a/java/serve/src/main/java/io/ray/serve/Router.java +++ b/java/serve/src/main/java/io/ray/serve/Router.java @@ -38,7 +38,7 @@ public Router(BaseActorHandle controllerHandle, String backendTag) { Map keyListeners = new HashMap<>(); keyListeners.put( new KeyType(LongPollNamespace.BACKEND_CONFIGS, backendTag), - backendConfig -> replicaSet.setMaxConcurrentQueries(backendConfig)); // cross language + deploymentConfig -> replicaSet.setMaxConcurrentQueries(deploymentConfig)); // cross language keyListeners.put( new KeyType(LongPollNamespace.REPLICA_HANDLES, backendTag), workerReplicas -> replicaSet.updateWorkerReplicas(workerReplicas)); // cross language diff --git a/java/serve/src/main/java/io/ray/serve/poll/LongPollClient.java b/java/serve/src/main/java/io/ray/serve/poll/LongPollClient.java index 308391254e10..54d107b717aa 100644 --- a/java/serve/src/main/java/io/ray/serve/poll/LongPollClient.java +++ b/java/serve/src/main/java/io/ray/serve/poll/LongPollClient.java @@ -47,7 +47,7 @@ public class LongPollClient { static { DESERIALIZERS.put( - LongPollNamespace.BACKEND_CONFIGS, body -> ServeProtoUtil.parseBackendConfig(body)); + LongPollNamespace.BACKEND_CONFIGS, body -> ServeProtoUtil.parseDeploymentConfig(body)); DESERIALIZERS.put( LongPollNamespace.REPLICA_HANDLES, body -> ServeProtoUtil.parseEndpointSet(body)); DESERIALIZERS.put( diff --git a/java/serve/src/main/java/io/ray/serve/util/ServeProtoUtil.java b/java/serve/src/main/java/io/ray/serve/util/ServeProtoUtil.java index eeee061a9fb8..edcc8399e101 100644 --- a/java/serve/src/main/java/io/ray/serve/util/ServeProtoUtil.java +++ b/java/serve/src/main/java/io/ray/serve/util/ServeProtoUtil.java @@ -7,8 +7,8 @@ import io.ray.runtime.serializer.MessagePackSerializer; import io.ray.serve.Constants; import io.ray.serve.RayServeException; -import io.ray.serve.generated.BackendConfig; -import io.ray.serve.generated.BackendLanguage; +import io.ray.serve.generated.DeploymentConfig; +import io.ray.serve.generated.DeploymentLanguage; import io.ray.serve.generated.DeploymentVersion; import io.ray.serve.generated.EndpointInfo; import io.ray.serve.generated.EndpointSet; @@ -25,23 +25,23 @@ public class ServeProtoUtil { private static final Gson GSON = new Gson(); - public static BackendConfig parseBackendConfig(byte[] backendConfigBytes) { + public static DeploymentConfig parseDeploymentConfig(byte[] deploymentConfigBytes) { - // Get a builder from BackendConfig(bytes) or create a new one. - BackendConfig.Builder builder = null; - if (backendConfigBytes == null) { - builder = BackendConfig.newBuilder(); + // Get a builder from DeploymentConfig(bytes) or create a new one. + DeploymentConfig.Builder builder = null; + if (deploymentConfigBytes == null) { + builder = DeploymentConfig.newBuilder(); } else { - BackendConfig backendConfig = null; + DeploymentConfig deploymentConfig = null; try { - backendConfig = BackendConfig.parseFrom(backendConfigBytes); + deploymentConfig = DeploymentConfig.parseFrom(deploymentConfigBytes); } catch (InvalidProtocolBufferException e) { - throw new RayServeException("Failed to parse BackendConfig from protobuf bytes.", e); + throw new RayServeException("Failed to parse DeploymentConfig from protobuf bytes.", e); } - if (backendConfig == null) { - builder = BackendConfig.newBuilder(); + if (deploymentConfig == null) { + builder = DeploymentConfig.newBuilder(); } else { - builder = BackendConfig.newBuilder(backendConfig); + builder = DeploymentConfig.newBuilder(deploymentConfig); } } @@ -64,22 +64,23 @@ public static BackendConfig parseBackendConfig(byte[] backendConfigBytes) { builder.setGracefulShutdownTimeoutS(20); } - if (builder.getBackendLanguage() == BackendLanguage.UNRECOGNIZED) { + if (builder.getDeploymentLanguage() == DeploymentLanguage.UNRECOGNIZED) { throw new RayServeException( LogUtil.format( "Unrecognized backend language {}. Backend language must be in {}.", - builder.getBackendLanguageValue(), - Lists.newArrayList(BackendLanguage.values()))); + builder.getDeploymentLanguageValue(), + Lists.newArrayList(DeploymentLanguage.values()))); } return builder.build(); } - public static Object parseUserConfig(BackendConfig backendConfig) { - if (backendConfig.getUserConfig() == null || backendConfig.getUserConfig().size() == 0) { + public static Object parseUserConfig(DeploymentConfig deploymentConfig) { + if (deploymentConfig.getUserConfig() == null || deploymentConfig.getUserConfig().size() == 0) { return null; } - return MessagePackSerializer.decode(backendConfig.getUserConfig().toByteArray(), Object.class); + return MessagePackSerializer.decode( + deploymentConfig.getUserConfig().toByteArray(), Object.class); } public static RequestMetadata parseRequestMetadata(byte[] requestMetadataBytes) diff --git a/java/serve/src/test/java/io/ray/serve/ProxyActorTest.java b/java/serve/src/test/java/io/ray/serve/ProxyActorTest.java index 632e2805b962..69f7e0ed987d 100644 --- a/java/serve/src/test/java/io/ray/serve/ProxyActorTest.java +++ b/java/serve/src/test/java/io/ray/serve/ProxyActorTest.java @@ -5,7 +5,7 @@ import io.ray.runtime.serializer.MessagePackSerializer; import io.ray.serve.api.Serve; import io.ray.serve.generated.ActorSet; -import io.ray.serve.generated.BackendConfig; +import io.ray.serve.generated.DeploymentConfig; import io.ray.serve.generated.DeploymentVersion; import io.ray.serve.generated.EndpointInfo; import io.ray.serve.util.CommonUtil; @@ -51,7 +51,7 @@ public void test() throws IOException { // Replica DeploymentInfo deploymentInfo = new DeploymentInfo(); - deploymentInfo.setBackendConfig(BackendConfig.newBuilder().build().toByteArray()); + deploymentInfo.setDeploymentConfig(DeploymentConfig.newBuilder().build().toByteArray()); deploymentInfo.setDeploymentVersion( DeploymentVersion.newBuilder().setCodeVersion(version).build().toByteArray()); deploymentInfo.setReplicaConfig( diff --git a/java/serve/src/test/java/io/ray/serve/RayServeHandleTest.java b/java/serve/src/test/java/io/ray/serve/RayServeHandleTest.java index ae2a6e22dddd..243ea1d5657b 100644 --- a/java/serve/src/test/java/io/ray/serve/RayServeHandleTest.java +++ b/java/serve/src/test/java/io/ray/serve/RayServeHandleTest.java @@ -5,8 +5,8 @@ import io.ray.api.Ray; import io.ray.runtime.serializer.MessagePackSerializer; import io.ray.serve.generated.ActorSet; -import io.ray.serve.generated.BackendConfig; -import io.ray.serve.generated.BackendLanguage; +import io.ray.serve.generated.DeploymentConfig; +import io.ray.serve.generated.DeploymentLanguage; import io.ray.serve.generated.DeploymentVersion; import java.util.HashMap; import org.testng.Assert; @@ -31,15 +31,15 @@ public void test() { Ray.actor(DummyServeController::new).setName(controllerName).remote(); // Replica - BackendConfig.Builder backendConfigBuilder = BackendConfig.newBuilder(); - backendConfigBuilder.setBackendLanguage(BackendLanguage.JAVA); - byte[] backendConfigBytes = backendConfigBuilder.build().toByteArray(); + DeploymentConfig.Builder deploymentConfigBuilder = DeploymentConfig.newBuilder(); + deploymentConfigBuilder.setDeploymentLanguage(DeploymentLanguage.JAVA); + byte[] deploymentConfigBytes = deploymentConfigBuilder.build().toByteArray(); Object[] initArgs = new Object[] {backendTag, replicaTag, controllerName, new Object()}; byte[] initArgsBytes = MessagePackSerializer.encode(initArgs).getLeft(); DeploymentInfo deploymentInfo = new DeploymentInfo(); - deploymentInfo.setBackendConfig(backendConfigBytes); + deploymentInfo.setDeploymentConfig(deploymentConfigBytes); deploymentInfo.setDeploymentVersion( DeploymentVersion.newBuilder().setCodeVersion(version).build().toByteArray()); deploymentInfo.setReplicaConfig( diff --git a/java/serve/src/test/java/io/ray/serve/RayServeReplicaTest.java b/java/serve/src/test/java/io/ray/serve/RayServeReplicaTest.java index 7c0e61b95221..7e9d5d5e5577 100644 --- a/java/serve/src/test/java/io/ray/serve/RayServeReplicaTest.java +++ b/java/serve/src/test/java/io/ray/serve/RayServeReplicaTest.java @@ -4,8 +4,8 @@ import io.ray.api.ObjectRef; import io.ray.api.Ray; import io.ray.runtime.serializer.MessagePackSerializer; -import io.ray.serve.generated.BackendConfig; -import io.ray.serve.generated.BackendLanguage; +import io.ray.serve.generated.DeploymentConfig; +import io.ray.serve.generated.DeploymentLanguage; import io.ray.serve.generated.DeploymentVersion; import io.ray.serve.generated.RequestMetadata; import io.ray.serve.generated.RequestWrapper; @@ -32,14 +32,14 @@ public void test() throws IOException { ActorHandle controllerHandle = Ray.actor(DummyServeController::new).setName(controllerName).remote(); - BackendConfig.Builder backendConfigBuilder = BackendConfig.newBuilder(); - backendConfigBuilder.setBackendLanguage(BackendLanguage.JAVA); - byte[] backendConfigBytes = backendConfigBuilder.build().toByteArray(); + DeploymentConfig.Builder deploymentConfigBuilder = DeploymentConfig.newBuilder(); + deploymentConfigBuilder.setDeploymentLanguage(DeploymentLanguage.JAVA); + byte[] deploymentConfigBytes = deploymentConfigBuilder.build().toByteArray(); Object[] initArgs = new Object[] {backendTag, replicaTag, controllerName, new Object()}; byte[] initArgsBytes = MessagePackSerializer.encode(initArgs).getLeft(); DeploymentInfo deploymentInfo = new DeploymentInfo(); - deploymentInfo.setBackendConfig(backendConfigBytes); + deploymentInfo.setDeploymentConfig(deploymentConfigBytes); deploymentInfo.setDeploymentVersion( DeploymentVersion.newBuilder().setCodeVersion(version).build().toByteArray()); deploymentInfo.setReplicaConfig( diff --git a/java/serve/src/test/java/io/ray/serve/ReplicaSetTest.java b/java/serve/src/test/java/io/ray/serve/ReplicaSetTest.java index 972d357dd114..d70edcec3082 100644 --- a/java/serve/src/test/java/io/ray/serve/ReplicaSetTest.java +++ b/java/serve/src/test/java/io/ray/serve/ReplicaSetTest.java @@ -5,8 +5,8 @@ import io.ray.api.Ray; import io.ray.runtime.serializer.MessagePackSerializer; import io.ray.serve.generated.ActorSet; -import io.ray.serve.generated.BackendConfig; -import io.ray.serve.generated.BackendLanguage; +import io.ray.serve.generated.DeploymentConfig; +import io.ray.serve.generated.DeploymentLanguage; import io.ray.serve.generated.DeploymentVersion; import io.ray.serve.generated.RequestMetadata; import java.util.HashMap; @@ -23,7 +23,7 @@ public class ReplicaSetTest { @Test public void setMaxConcurrentQueriesTest() { ReplicaSet replicaSet = new ReplicaSet(backendTag); - BackendConfig.Builder builder = BackendConfig.newBuilder(); + DeploymentConfig.Builder builder = DeploymentConfig.newBuilder(); builder.setMaxConcurrentQueries(200); replicaSet.setMaxConcurrentQueries(builder.build()); @@ -58,15 +58,15 @@ public void assignReplicaTest() { Ray.actor(DummyServeController::new).setName(controllerName).remote(); // Replica - BackendConfig.Builder backendConfigBuilder = BackendConfig.newBuilder(); - backendConfigBuilder.setBackendLanguage(BackendLanguage.JAVA); - byte[] backendConfigBytes = backendConfigBuilder.build().toByteArray(); + DeploymentConfig.Builder deploymentConfigBuilder = DeploymentConfig.newBuilder(); + deploymentConfigBuilder.setDeploymentLanguage(DeploymentLanguage.JAVA); + byte[] deploymentConfigBytes = deploymentConfigBuilder.build().toByteArray(); Object[] initArgs = new Object[] {backendTag, replicaTag, controllerName, new Object()}; byte[] initArgsBytes = MessagePackSerializer.encode(initArgs).getLeft(); DeploymentInfo deploymentInfo = new DeploymentInfo(); - deploymentInfo.setBackendConfig(backendConfigBytes); + deploymentInfo.setDeploymentConfig(deploymentConfigBytes); deploymentInfo.setDeploymentVersion( DeploymentVersion.newBuilder().setCodeVersion(version).build().toByteArray()); deploymentInfo.setReplicaConfig( diff --git a/java/serve/src/test/java/io/ray/serve/RouterTest.java b/java/serve/src/test/java/io/ray/serve/RouterTest.java index f52f8822eaba..03ee6da4a6ef 100644 --- a/java/serve/src/test/java/io/ray/serve/RouterTest.java +++ b/java/serve/src/test/java/io/ray/serve/RouterTest.java @@ -5,8 +5,8 @@ import io.ray.api.Ray; import io.ray.runtime.serializer.MessagePackSerializer; import io.ray.serve.generated.ActorSet; -import io.ray.serve.generated.BackendConfig; -import io.ray.serve.generated.BackendLanguage; +import io.ray.serve.generated.DeploymentConfig; +import io.ray.serve.generated.DeploymentLanguage; import io.ray.serve.generated.DeploymentVersion; import io.ray.serve.generated.RequestMetadata; import java.util.HashMap; @@ -33,15 +33,15 @@ public void test() { Ray.actor(DummyServeController::new).setName(controllerName).remote(); // Replica - BackendConfig.Builder backendConfigBuilder = BackendConfig.newBuilder(); - backendConfigBuilder.setBackendLanguage(BackendLanguage.JAVA); - byte[] backendConfigBytes = backendConfigBuilder.build().toByteArray(); + DeploymentConfig.Builder deploymentConfigBuilder = DeploymentConfig.newBuilder(); + deploymentConfigBuilder.setDeploymentLanguage(DeploymentLanguage.JAVA); + byte[] deploymentConfigBytes = deploymentConfigBuilder.build().toByteArray(); Object[] initArgs = new Object[] {backendTag, replicaTag, controllerName, new Object()}; byte[] initArgsBytes = MessagePackSerializer.encode(initArgs).getLeft(); DeploymentInfo deploymentInfo = new DeploymentInfo(); - deploymentInfo.setBackendConfig(backendConfigBytes); + deploymentInfo.setDeploymentConfig(deploymentConfigBytes); deploymentInfo.setDeploymentVersion( DeploymentVersion.newBuilder().setCodeVersion(version).build().toByteArray()); deploymentInfo.setReplicaConfig( diff --git a/java/serve/src/test/java/io/ray/serve/poll/LongPollClientTest.java b/java/serve/src/test/java/io/ray/serve/poll/LongPollClientTest.java index 7ee254806fad..6bda0c277907 100644 --- a/java/serve/src/test/java/io/ray/serve/poll/LongPollClientTest.java +++ b/java/serve/src/test/java/io/ray/serve/poll/LongPollClientTest.java @@ -1,7 +1,7 @@ package io.ray.serve.poll; import com.google.protobuf.ByteString; -import io.ray.serve.generated.BackendConfig; +import io.ray.serve.generated.DeploymentConfig; import io.ray.serve.generated.UpdatedObject; import java.util.HashMap; import java.util.Map; @@ -19,18 +19,18 @@ public void test() throws Throwable { KeyType keyType = new KeyType(LongPollNamespace.BACKEND_CONFIGS, "backendTag"); Map keyListeners = new HashMap<>(); keyListeners.put( - keyType, (object) -> a[0] = String.valueOf(((BackendConfig) object).getNumReplicas())); + keyType, (object) -> a[0] = String.valueOf(((DeploymentConfig) object).getNumReplicas())); // Initialize LongPollClient. LongPollClient longPollClient = new LongPollClient(null, keyListeners); // Construct updated object. - BackendConfig.Builder backendConfig = BackendConfig.newBuilder(); - backendConfig.setNumReplicas(20); + DeploymentConfig.Builder deploymentConfig = DeploymentConfig.newBuilder(); + deploymentConfig.setNumReplicas(20); int snapshotId = 10; UpdatedObject.Builder updatedObject = UpdatedObject.newBuilder(); updatedObject.setSnapshotId(snapshotId); - updatedObject.setObjectSnapshot(ByteString.copyFrom(backendConfig.build().toByteArray())); + updatedObject.setObjectSnapshot(ByteString.copyFrom(deploymentConfig.build().toByteArray())); // Process update. Map updates = new HashMap<>(); @@ -40,8 +40,8 @@ public void test() throws Throwable { // Validation. Assert.assertEquals(longPollClient.getSnapshotIds().get(keyType).intValue(), snapshotId); Assert.assertEquals( - ((BackendConfig) longPollClient.getObjectSnapshots().get(keyType)).getNumReplicas(), - backendConfig.getNumReplicas()); - Assert.assertEquals(a[0], String.valueOf(backendConfig.getNumReplicas())); + ((DeploymentConfig) longPollClient.getObjectSnapshots().get(keyType)).getNumReplicas(), + deploymentConfig.getNumReplicas()); + Assert.assertEquals(a[0], String.valueOf(deploymentConfig.getNumReplicas())); } } diff --git a/python/ray/serve/api.py b/python/ray/serve/api.py index 06bacdf2b2d1..952e9aee57fa 100644 --- a/python/ray/serve/api.py +++ b/python/ray/serve/api.py @@ -17,7 +17,7 @@ from ray.actor import ActorHandle from ray.serve.common import BackendInfo, GoalId, ReplicaTag -from ray.serve.config import (AutoscalingConfig, BackendConfig, HTTPOptions, +from ray.serve.config import (AutoscalingConfig, DeploymentConfig, HTTPOptions, ReplicaConfig) from ray.serve.constants import (DEFAULT_CHECKPOINT_PATH, HTTP_PROXY_TIMEOUT, SERVE_CONTROLLER_NAME, MAX_CACHED_HANDLES, @@ -187,18 +187,19 @@ def _wait_for_goal(self, return False @_ensure_connected - def deploy(self, - name: str, - backend_def: Union[Callable, Type[Callable], str], - init_args: Tuple[Any], - init_kwargs: Dict[Any, Any], - ray_actor_options: Optional[Dict] = None, - config: Optional[Union[BackendConfig, Dict[str, Any]]] = None, - version: Optional[str] = None, - prev_version: Optional[str] = None, - route_prefix: Optional[str] = None, - url: str = "", - _blocking: Optional[bool] = True) -> Optional[GoalId]: + def deploy( + self, + name: str, + deployment_def: Union[Callable, Type[Callable], str], + init_args: Tuple[Any], + init_kwargs: Dict[Any, Any], + ray_actor_options: Optional[Dict] = None, + config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None, + version: Optional[str] = None, + prev_version: Optional[str] = None, + route_prefix: Optional[str] = None, + url: str = "", + _blocking: Optional[bool] = True) -> Optional[GoalId]: if config is None: config = {} if ray_actor_options is None: @@ -212,23 +213,25 @@ def deploy(self, ray_actor_options["runtime_env"] = curr_job_env replica_config = ReplicaConfig( - backend_def, + deployment_def, init_args=init_args, init_kwargs=init_kwargs, ray_actor_options=ray_actor_options) if isinstance(config, dict): - backend_config = BackendConfig.parse_obj(config) - elif isinstance(config, BackendConfig): - backend_config = config + deployment_config = DeploymentConfig.parse_obj(config) + elif isinstance(config, DeploymentConfig): + deployment_config = config else: - raise TypeError("config must be a BackendConfig or a dictionary.") + raise TypeError( + "config must be a DeploymentConfig or a dictionary.") goal_id, updating = ray.get( - self._controller.deploy.remote( - name, backend_config.to_proto_bytes(), replica_config, version, - prev_version, route_prefix, - ray.get_runtime_context().job_id)) + self._controller.deploy.remote(name, + deployment_config.to_proto_bytes(), + replica_config, version, + prev_version, route_prefix, + ray.get_runtime_context().job_id)) tag = f"component=serve deployment={name}" @@ -626,7 +629,7 @@ class Deployment: def __init__(self, func_or_class: Callable, name: str, - config: BackendConfig, + config: DeploymentConfig, version: Optional[str] = None, prev_version: Optional[str] = None, init_args: Optional[Tuple[Any]] = None, @@ -1021,7 +1024,7 @@ class MyDeployment: raise ValueError("Manually setting num_replicas is not allowed when " "_autoscaling_config is provided.") - config = BackendConfig() + config = DeploymentConfig() if num_replicas is not None: config.num_replicas = num_replicas @@ -1085,9 +1088,10 @@ def get_deployment(name: str) -> Deployment: raise KeyError(f"Deployment {name} was not found. " "Did you call Deployment.deploy()?") return Deployment( - cloudpickle.loads(backend_info.replica_config.serialized_backend_def), + cloudpickle.loads( + backend_info.replica_config.serialized_deployment_def), name, - backend_info.backend_config, + backend_info.deployment_config, version=backend_info.version, init_args=backend_info.replica_config.init_args, init_kwargs=backend_info.replica_config.init_kwargs, @@ -1109,9 +1113,9 @@ def list_deployments() -> Dict[str, Deployment]: for name, (backend_info, route_prefix) in infos.items(): deployments[name] = Deployment( cloudpickle.loads( - backend_info.replica_config.serialized_backend_def), + backend_info.replica_config.serialized_deployment_def), name, - backend_info.backend_config, + backend_info.deployment_config, version=backend_info.version, init_args=backend_info.replica_config.init_args, init_kwargs=backend_info.replica_config.init_kwargs, diff --git a/python/ray/serve/backend_state.py b/python/ray/serve/backend_state.py index 8f23bc3fba75..7b00235f34af 100644 --- a/python/ray/serve/backend_state.py +++ b/python/ray/serve/backend_state.py @@ -11,7 +11,7 @@ from ray.serve.async_goal_manager import AsyncGoalManager from ray.serve.common import (BackendInfo, BackendTag, Duration, GoalId, ReplicaTag, ReplicaName, RunningReplicaInfo) -from ray.serve.config import BackendConfig +from ray.serve.config import DeploymentConfig from ray.serve.constants import ( CONTROLLER_STARTUP_GRACE_PERIOD_S, SERVE_CONTROLLER_NAME, SERVE_PROXY_NAME, MAX_DEPLOYMENT_CONSTRUCTOR_RETRY_COUNT, MAX_NUM_DELETED_DEPLOYMENTS) @@ -156,9 +156,9 @@ def start(self, backend_info: BackendInfo, version: DeploymentVersion): """ self._actor_resources = backend_info.replica_config.resource_dict self._max_concurrent_queries = ( - backend_info.backend_config.max_concurrent_queries) + backend_info.deployment_config.max_concurrent_queries) self._graceful_shutdown_timeout_s = ( - backend_info.backend_config.graceful_shutdown_timeout_s) + backend_info.deployment_config.graceful_shutdown_timeout_s) if USE_PLACEMENT_GROUP: self._placement_group = self.create_placement_group( self._placement_group_name, self._actor_resources) @@ -177,11 +177,11 @@ def start(self, backend_info: BackendInfo, version: DeploymentVersion): self.backend_tag, self.replica_tag, backend_info.replica_config.init_args, backend_info.replica_config.init_kwargs, - backend_info.backend_config.to_proto_bytes(), version, + backend_info.deployment_config.to_proto_bytes(), version, self._controller_name, self._detached) self._ready_obj_ref = self._actor_handle.reconfigure.remote( - backend_info.backend_config.user_config) + backend_info.deployment_config.user_config) def update_user_config(self, user_config: Any): """ @@ -242,11 +242,11 @@ def check_ready( return ReplicaStartupStatus.PENDING, None elif len(ready) > 0: try: - backend_config, version = ray.get(ready)[0] + deployment_config, version = ray.get(ready)[0] self._max_concurrent_queries = ( - backend_config.max_concurrent_queries) + deployment_config.max_concurrent_queries) self._graceful_shutdown_timeout_s = ( - backend_config.graceful_shutdown_timeout_s) + deployment_config.graceful_shutdown_timeout_s) except Exception: return ReplicaStartupStatus.FAILED, None @@ -726,11 +726,11 @@ def _set_backend_goal(self, backend_info: Optional[BackendInfo]) -> None: if backend_info is not None: self._target_info = backend_info - self._target_replicas = backend_info.backend_config.num_replicas + self._target_replicas = backend_info.deployment_config.num_replicas self._target_version = DeploymentVersion( backend_info.version, - user_config=backend_info.backend_config.user_config) + user_config=backend_info.deployment_config.user_config) else: self._target_replicas = 0 @@ -746,7 +746,7 @@ def deploy(self, backend_info: BackendInfo) -> Tuple[Optional[GoalId], bool]: """Deploy the backend. - If the backend already exists with the same version and BackendConfig, + If the backend already exists with the same version and config, this is a no-op and returns the GoalId corresponding to the existing update if there is one. @@ -760,7 +760,8 @@ def deploy(self, # Redeploying should not reset the deployment's start time. backend_info.start_time_ms = existing_info.start_time_ms - if (existing_info.backend_config == backend_info.backend_config + if (existing_info.deployment_config == + backend_info.deployment_config and backend_info.version is not None and existing_info.version == backend_info.version): return self._curr_goal, False @@ -1291,19 +1292,20 @@ def get_running_replica_infos( return replicas - def get_backend_configs(self, - filter_tag: Optional[BackendTag] = None, - include_deleted: Optional[bool] = False - ) -> Dict[BackendTag, BackendConfig]: - configs: Dict[BackendTag, BackendConfig] = {} + def get_deployment_configs(self, + filter_tag: Optional[BackendTag] = None, + include_deleted: Optional[bool] = False + ) -> Dict[BackendTag, DeploymentConfig]: + configs: Dict[BackendTag, DeploymentConfig] = {} for backend_tag, backend_state in self._backend_states.items(): if filter_tag is None or backend_tag == filter_tag: - configs[backend_tag] = backend_state.target_info.backend_config + configs[ + backend_tag] = backend_state.target_info.deployment_config if include_deleted: for backend_tag, info in self._deleted_backend_metadata.items(): if filter_tag is None or backend_tag == filter_tag: - configs[backend_tag] = info.backend_config + configs[backend_tag] = info.deployment_config return configs @@ -1322,7 +1324,7 @@ def deploy_backend(self, backend_tag: BackendTag, backend_info: BackendInfo ) -> Tuple[Optional[GoalId], bool]: """Deploy the backend. - If the backend already exists with the same version and BackendConfig, + If the backend already exists with the same version and config, this is a no-op and returns the GoalId corresponding to the existing update if there is one. diff --git a/python/ray/serve/common.py b/python/ray/serve/common.py index 3c236a668d26..eb0a5dead6ea 100644 --- a/python/ray/serve/common.py +++ b/python/ray/serve/common.py @@ -5,7 +5,7 @@ from uuid import UUID from ray.actor import ActorClass, ActorHandle -from ray.serve.config import BackendConfig, ReplicaConfig +from ray.serve.config import DeploymentConfig, ReplicaConfig from ray.serve.autoscaling_policy import AutoscalingPolicy BackendTag = str @@ -23,7 +23,7 @@ class EndpointInfo: class BackendInfo: def __init__(self, - backend_config: BackendConfig, + deployment_config: DeploymentConfig, replica_config: ReplicaConfig, start_time_ms: int, actor_def: Optional[ActorClass] = None, @@ -31,7 +31,7 @@ def __init__(self, deployer_job_id: "Optional[ray._raylet.JobID]" = None, end_time_ms: Optional[int] = None, autoscaling_policy: Optional[AutoscalingPolicy] = None): - self.backend_config = backend_config + self.deployment_config = deployment_config self.replica_config = replica_config # The time when .deploy() was first called for this deployment. self.start_time_ms = start_time_ms diff --git a/python/ray/serve/config.py b/python/ray/serve/config.py index 7f010d3ff37e..60a1e1344dd8 100644 --- a/python/ray/serve/config.py +++ b/python/ray/serve/config.py @@ -7,10 +7,10 @@ from google.protobuf.json_format import MessageToDict from pydantic import BaseModel, NonNegativeFloat, PositiveInt, validator from ray.serve.constants import DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT -from ray.serve.generated.serve_pb2 import (BackendConfig as BackendConfigProto, - AutoscalingConfig as - AutoscalingConfigProto) -from ray.serve.generated.serve_pb2 import BackendLanguage +from ray.serve.generated.serve_pb2 import ( + DeploymentConfig as DeploymentConfigProto, AutoscalingConfig as + AutoscalingConfigProto) +from ray.serve.generated.serve_pb2 import DeploymentLanguage from ray import cloudpickle as cloudpickle @@ -56,21 +56,21 @@ class AutoscalingConfig(BaseModel): # TODO(architkulkarni): Add pydantic validation. E.g. max_replicas>=min -class BackendConfig(BaseModel): - """Configuration options for a backend, to be set by the user. +class DeploymentConfig(BaseModel): + """Configuration options for a deployment, to be set by the user. Args: num_replicas (Optional[int]): The number of processes to start up that - will handle requests to this backend. Defaults to 1. + will handle requests to this deployment. Defaults to 1. max_concurrent_queries (Optional[int]): The maximum number of queries - that will be sent to a replica of this backend without receiving a - response. Defaults to 100. + that will be sent to a replica of this deployment without receiving + a response. Defaults to 100. user_config (Optional[Any]): Arguments to pass to the reconfigure - method of the backend. The reconfigure method is called if + method of the deployment. The reconfigure method is called if user_config is not None. graceful_shutdown_wait_loop_s (Optional[float]): Duration - that backend workers will wait until there is no more work to be - done before shutting down. Defaults to 2s. + that deployment replicas will wait until there is no more work to + be done before shutting down. Defaults to 2s. graceful_shutdown_timeout_s (Optional[float]): Controller waits for this duration to forcefully kill the replica for shutdown. Defaults to 20s. @@ -107,15 +107,15 @@ def to_proto_bytes(self): if data.get("autoscaling_config"): data["autoscaling_config"] = AutoscalingConfigProto( **data["autoscaling_config"]) - return BackendConfigProto( + return DeploymentConfigProto( is_cross_language=False, - backend_language=BackendLanguage.PYTHON, + deployment_language=DeploymentLanguage.PYTHON, **data, ).SerializeToString() @classmethod def from_proto_bytes(cls, proto_bytes: bytes): - proto = BackendConfigProto.FromString(proto_bytes) + proto = DeploymentConfigProto.FromString(proto_bytes) data = MessageToDict( proto, including_default_value_fields=True, @@ -131,37 +131,36 @@ def from_proto_bytes(cls, proto_bytes: bytes): # Delete fields which are only used in protobuf, not in Python. del data["is_cross_language"] - del data["backend_language"] + del data["deployment_language"] return cls(**data) class ReplicaConfig: def __init__(self, - backend_def: Callable, + deployment_def: Callable, init_args: Optional[Tuple[Any]] = None, init_kwargs: Optional[Dict[Any, Any]] = None, ray_actor_options=None): - # Validate that backend_def is an import path, function, or class. - if isinstance(backend_def, str): - self.func_or_class_name = backend_def - pass - elif inspect.isfunction(backend_def): - self.func_or_class_name = backend_def.__name__ + # Validate that deployment_def is an import path, function, or class. + if isinstance(deployment_def, str): + self.func_or_class_name = deployment_def + elif inspect.isfunction(deployment_def): + self.func_or_class_name = deployment_def.__name__ if init_args: raise ValueError( - "init_args not supported for function backend.") + "init_args not supported for function deployments.") if init_kwargs: raise ValueError( - "init_kwargs not supported for function backend.") - elif inspect.isclass(backend_def): - self.func_or_class_name = backend_def.__name__ + "init_kwargs not supported for function deployments.") + elif inspect.isclass(deployment_def): + self.func_or_class_name = deployment_def.__name__ else: raise TypeError( - "Backend must be an import path, function or class, it is {}.". - format(type(backend_def))) + "Deployment must be a function or class, it is {}.".format( + type(deployment_def))) - self.serialized_backend_def = cloudpickle.dumps(backend_def) + self.serialized_deployment_def = cloudpickle.dumps(deployment_def) self.init_args = init_args if init_args is not None else () self.init_kwargs = init_kwargs if init_kwargs is not None else {} if ray_actor_options is None: @@ -175,7 +174,7 @@ def __init__(self, def _validate(self): if "placement_group" in self.ray_actor_options: - raise ValueError("Providing placement_group for backend actors " + raise ValueError("Providing placement_group for deployment actors " "is not currently supported.") if not isinstance(self.ray_actor_options, dict): diff --git a/python/ray/serve/controller.py b/python/ray/serve/controller.py index e1dd906a398a..cbebddafa0c4 100644 --- a/python/ray/serve/controller.py +++ b/python/ray/serve/controller.py @@ -20,7 +20,7 @@ NodeId, RunningReplicaInfo, ) -from ray.serve.config import BackendConfig, HTTPOptions, ReplicaConfig +from ray.serve.config import DeploymentConfig, HTTPOptions, ReplicaConfig from ray.serve.constants import CONTROL_LOOP_PERIOD_S, SERVE_ROOT_URL_ENV_KEY from ray.serve.endpoint_state import EndpointState from ray.serve.http_state import HTTPState @@ -142,7 +142,7 @@ def autoscale(self) -> None: """Updates autoscaling deployments with calculated num_replicas.""" for deployment_name, (backend_info, route_prefix) in self.list_deployments().items(): - backend_config = backend_info.backend_config + deployment_config = backend_info.deployment_config autoscaling_policy = backend_info.autoscaling_policy if autoscaling_policy is None: @@ -166,16 +166,16 @@ def autoscale(self) -> None: if len(current_num_ongoing_requests) == 0: continue - new_backend_config = backend_config.copy() + new_deployment_config = deployment_config.copy() decision_num_replicas = ( autoscaling_policy.get_decision_num_replicas( current_num_ongoing_requests=current_num_ongoing_requests, - curr_target_num_replicas=backend_config.num_replicas)) - new_backend_config.num_replicas = decision_num_replicas + curr_target_num_replicas=deployment_config.num_replicas)) + new_deployment_config.num_replicas = decision_num_replicas new_backend_info = copy(backend_info) - new_backend_info.backend_config = new_backend_config + new_backend_info.deployment_config = new_deployment_config goal_id, updating = self.backend_state_manager.deploy_backend( deployment_name, new_backend_info) @@ -275,7 +275,7 @@ async def shutdown(self) -> List[GoalId]: def deploy(self, name: str, - backend_config_proto_bytes: bytes, + deployment_config_proto_bytes: bytes, replica_config: ReplicaConfig, version: Optional[str], prev_version: Optional[str], @@ -285,8 +285,8 @@ def deploy(self, if route_prefix is not None: assert route_prefix.startswith("/") - backend_config = BackendConfig.from_proto_bytes( - backend_config_proto_bytes) + deployment_config = DeploymentConfig.from_proto_bytes( + deployment_config_proto_bytes) if prev_version is not None: existing_backend_info = self.backend_state_manager.get_backend( @@ -301,10 +301,10 @@ def deploy(self, "does not match with the existing " f"version '{existing_backend_info.version}'.") - autoscaling_config = backend_config.autoscaling_config + autoscaling_config = deployment_config.autoscaling_config if autoscaling_config is not None: # TODO: is this the desired behaviour? Should this be a setting? - backend_config.num_replicas = autoscaling_config.min_replicas + deployment_config.num_replicas = autoscaling_config.min_replicas autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config) else: @@ -312,10 +312,10 @@ def deploy(self, backend_info = BackendInfo( actor_def=ray.remote( - create_replica_wrapper(name, - replica_config.serialized_backend_def)), + create_replica_wrapper( + name, replica_config.serialized_deployment_def)), version=version, - backend_config=backend_config, + deployment_config=deployment_config, replica_config=replica_config, deployer_job_id=deployer_job_id, start_time_ms=int(time.time() * 1000), @@ -373,6 +373,6 @@ def list_deployments(self, include_deleted: Optional[bool] = False name: (self.backend_state_manager.get_backend( name, include_deleted=include_deleted), self.endpoint_state.get_endpoint_route(name)) - for name in self.backend_state_manager.get_backend_configs( + for name in self.backend_state_manager.get_deployment_configs( include_deleted=include_deleted) } diff --git a/python/ray/serve/replica.py b/python/ray/serve/replica.py index a85ed390b71c..45db316106a7 100644 --- a/python/ray/serve/replica.py +++ b/python/ray/serve/replica.py @@ -15,7 +15,7 @@ from ray.serve.autoscaling_metrics import start_metrics_pusher from ray.serve.common import BackendTag, ReplicaTag -from ray.serve.config import BackendConfig +from ray.serve.config import DeploymentConfig from ray.serve.http_util import ASGIHTTPSender from ray.serve.utils import parse_request_item, _get_logger from ray.serve.exceptions import RayServeException @@ -31,30 +31,30 @@ logger = _get_logger() -def create_replica_wrapper(name: str, serialized_backend_def: bytes): +def create_replica_wrapper(name: str, serialized_deployment_def: bytes): """Creates a replica class wrapping the provided function or class. This approach is picked over inheritance to avoid conflict between user provided class and the RayServeReplica class. """ - serialized_backend_def = serialized_backend_def + serialized_deployment_def = serialized_deployment_def # TODO(architkulkarni): Add type hints after upgrading cloudpickle class RayServeWrappedReplica(object): async def __init__(self, backend_tag, replica_tag, init_args, - init_kwargs, backend_config_proto_bytes: bytes, + init_kwargs, deployment_config_proto_bytes: bytes, version: DeploymentVersion, controller_name: str, detached: bool): - backend = cloudpickle.loads(serialized_backend_def) - backend_config = BackendConfig.from_proto_bytes( - backend_config_proto_bytes) + backend = cloudpickle.loads(serialized_deployment_def) + deployment_config = DeploymentConfig.from_proto_bytes( + deployment_config_proto_bytes) if inspect.isfunction(backend): is_function = True elif inspect.isclass(backend): is_function = False else: - assert False, ("backend_def must be function, class, or " + assert False, ("deployment_def must be function, class, or " "corresponding import path.") # Set the controller name so that serve.connect() in the user's @@ -85,10 +85,10 @@ async def __init__(self, backend_tag, replica_tag, init_args, detached) controller_handle = ray.get_actor( controller_name, namespace=controller_namespace) - self.backend = RayServeReplica(_callable, backend_tag, replica_tag, - backend_config, - backend_config.user_config, version, - is_function, controller_handle) + self.backend = RayServeReplica( + _callable, backend_tag, replica_tag, deployment_config, + deployment_config.user_config, version, is_function, + controller_handle) # asyncio.Event used to signal that the replica is shutting down. self.shutdown_event = asyncio.Event() @@ -109,14 +109,14 @@ async def handle_request( return await self.backend.handle_request(query) async def reconfigure(self, user_config: Optional[Any] = None - ) -> Tuple[BackendConfig, DeploymentVersion]: + ) -> Tuple[DeploymentConfig, DeploymentVersion]: if user_config is not None: await self.backend.reconfigure(user_config) return self.get_metadata() - def get_metadata(self) -> Tuple[BackendConfig, DeploymentVersion]: - return self.backend.backend_config, self.backend.version + def get_metadata(self) -> Tuple[DeploymentConfig, DeploymentVersion]: + return self.backend.deployment_config, self.backend.version async def prepare_for_shutdown(self): self.shutdown_event.set() @@ -146,10 +146,10 @@ class RayServeReplica: """Handles requests with the provided callable.""" def __init__(self, _callable: Callable, backend_tag: BackendTag, - replica_tag: ReplicaTag, backend_config: BackendConfig, + replica_tag: ReplicaTag, deployment_config: DeploymentConfig, user_config: Any, version: DeploymentVersion, is_function: bool, controller_handle: ActorHandle) -> None: - self.backend_config = backend_config + self.deployment_config = deployment_config self.backend_tag = backend_tag self.replica_tag = replica_tag self.callable = _callable @@ -211,10 +211,10 @@ def __init__(self, _callable: Callable, backend_tag: BackendTag, self.restart_counter.inc() self._shutdown_wait_loop_s = ( - backend_config.graceful_shutdown_wait_loop_s) + deployment_config.graceful_shutdown_wait_loop_s) - if backend_config.autoscaling_config: - config = backend_config.autoscaling_config + if deployment_config.autoscaling_config: + config = deployment_config.autoscaling_config start_metrics_pusher( interval_s=config.metrics_interval_s, collection_callback=self._collect_autoscaling_metrics, @@ -319,7 +319,8 @@ async def reconfigure(self, user_config: Any): self.version = DeploymentVersion( self.version.code_version, user_config=user_config) if self.is_function: - raise ValueError("backend_def must be a class to use user_config") + raise ValueError( + "deployment_def must be a class to use user_config") elif not hasattr(self.callable, BACKEND_RECONFIGURE_METHOD): raise RayServeException("user_config specified but backend " + self.backend_tag + " missing " + diff --git a/python/ray/serve/tests/test_backend_state.py b/python/ray/serve/tests/test_backend_state.py index 74a170a6f1c6..687382945249 100644 --- a/python/ray/serve/tests/test_backend_state.py +++ b/python/ray/serve/tests/test_backend_state.py @@ -8,7 +8,7 @@ from ray.actor import ActorHandle from ray.serve.common import ( - BackendConfig, + DeploymentConfig, BackendInfo, BackendTag, ReplicaConfig, @@ -130,7 +130,7 @@ def available_resources(self) -> Dict[str, float]: def graceful_stop(self) -> None: assert self.started self.stopped = True - return self.backend_info.backend_config.graceful_shutdown_timeout_s + return self.backend_info.deployment_config.graceful_shutdown_timeout_s def check_stopped(self) -> bool: return self.done_stopping @@ -154,7 +154,7 @@ def backend_info(version: Optional[str] = None, actor_def=None, version=version, start_time_ms=0, - backend_config=BackendConfig( + deployment_config=DeploymentConfig( num_replicas=num_replicas, user_config=user_config, **config_opts), replica_config=ReplicaConfig(lambda x: x)) @@ -163,7 +163,8 @@ def backend_info(version: Optional[str] = None, else: code_version = get_random_letters() - version = DeploymentVersion(code_version, info.backend_config.user_config) + version = DeploymentVersion(code_version, + info.deployment_config.user_config) return info, version diff --git a/python/ray/serve/tests/test_config.py b/python/ray/serve/tests/test_config.py index 505147055e34..f8e96487434d 100644 --- a/python/ray/serve/tests/test_config.py +++ b/python/ray/serve/tests/test_config.py @@ -1,29 +1,29 @@ import pytest from pydantic import ValidationError -from ray.serve.config import (BackendConfig, DeploymentMode, HTTPOptions, +from ray.serve.config import (DeploymentConfig, DeploymentMode, HTTPOptions, ReplicaConfig) from ray.serve.config import AutoscalingConfig -def test_backend_config_validation(): +def test_deployment_config_validation(): # Test unknown key. with pytest.raises(ValidationError): - BackendConfig(unknown_key=-1) + DeploymentConfig(unknown_key=-1) # Test num_replicas validation. - BackendConfig(num_replicas=1) + DeploymentConfig(num_replicas=1) with pytest.raises(ValidationError, match="type_error"): - BackendConfig(num_replicas="hello") + DeploymentConfig(num_replicas="hello") with pytest.raises(ValidationError, match="value_error"): - BackendConfig(num_replicas=-1) + DeploymentConfig(num_replicas=-1) # Test dynamic default for max_concurrent_queries. - assert BackendConfig().max_concurrent_queries == 100 + assert DeploymentConfig().max_concurrent_queries == 100 -def test_backend_config_update(): - b = BackendConfig(num_replicas=1, max_concurrent_queries=1) +def test_deployment_config_update(): + b = DeploymentConfig(num_replicas=1, max_concurrent_queries=1) # Test updating a key works. b.num_replicas = 2 @@ -108,18 +108,18 @@ def test_http_options(): def test_with_proto(): # Test roundtrip - config = BackendConfig(num_replicas=100, max_concurrent_queries=16) - assert config == BackendConfig.from_proto_bytes(config.to_proto_bytes()) + config = DeploymentConfig(num_replicas=100, max_concurrent_queries=16) + assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes()) # Test user_config object - config = BackendConfig(user_config={"python": ("native", ["objects"])}) - assert config == BackendConfig.from_proto_bytes(config.to_proto_bytes()) + config = DeploymentConfig(user_config={"python": ("native", ["objects"])}) + assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes()) def test_zero_default_proto(): # Test that options set to zero (protobuf default value) still retain their # original value after being serialized and deserialized. - config = BackendConfig( + config = DeploymentConfig( autoscaling_config={ "min_replicas": 1, "max_replicas": 2, @@ -127,7 +127,7 @@ def test_zero_default_proto(): "downscale_delay_s": 0 }) serialized_config = config.to_proto_bytes() - deserialized_config = BackendConfig.from_proto_bytes(serialized_config) + deserialized_config = DeploymentConfig.from_proto_bytes(serialized_config) new_delay_s = deserialized_config.autoscaling_config.downscale_delay_s assert new_delay_s == 0 diff --git a/src/ray/protobuf/serve.proto b/src/ray/protobuf/serve.proto index 2cadc2128dfb..5bfa6775136d 100644 --- a/src/ray/protobuf/serve.proto +++ b/src/ray/protobuf/serve.proto @@ -51,21 +51,21 @@ message AutoscalingConfig { double upscale_delay_s = 8; } -// Configuration options for a backend, to be set by the user. -message BackendConfig { - // The number of processes to start up that will handle requests to this backend. +// Configuration options for a deployment, to be set by the user. +message DeploymentConfig { + // The number of processes to start up that will handle requests to this deployment. // Defaults to 1. int32 num_replicas = 1; - // The maximum number of queries that will be sent to a replica of this backend without + // The maximum number of queries that will be sent to a replica of this deployment without // receiving a response. Defaults to 100. int32 max_concurrent_queries = 2; - // Arguments to pass to the reconfigure method of the backend. The reconfigure method is + // Arguments to pass to the reconfigure method of the deployment. The reconfigure method is // called if user_config is not None. bytes user_config = 3; - // Duration that backend workers will wait until there is no more work to be done before + // Duration that deployment replicas will wait until there is no more work to be done before // shutting down. Defaults to 2s. double graceful_shutdown_wait_loop_s = 4; @@ -73,18 +73,18 @@ message BackendConfig { // Defaults to 20s. double graceful_shutdown_timeout_s = 5; - // Is the construction of backend is cross language? + // Is the construction of deployment is cross language? bool is_cross_language = 6; - // The backend's programming language. - BackendLanguage backend_language = 7; + // The deployment's programming language. + DeploymentLanguage deployment_language = 7; - // The backend's autoscaling configuration. + // The deployment's autoscaling configuration. AutoscalingConfig autoscaling_config = 8; } // Backend language. -enum BackendLanguage { +enum DeploymentLanguage { PYTHON = 0; JAVA = 1; } From 241573d9994867453a773a09617523dbc40f3ff7 Mon Sep 17 00:00:00 2001 From: Ed Oakes Date: Sun, 31 Oct 2021 16:14:46 -0500 Subject: [PATCH 2/2] fix lint --- src/ray/protobuf/serve.proto | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ray/protobuf/serve.proto b/src/ray/protobuf/serve.proto index 5bfa6775136d..20f451f1129d 100644 --- a/src/ray/protobuf/serve.proto +++ b/src/ray/protobuf/serve.proto @@ -57,16 +57,16 @@ message DeploymentConfig { // Defaults to 1. int32 num_replicas = 1; - // The maximum number of queries that will be sent to a replica of this deployment without - // receiving a response. Defaults to 100. + // The maximum number of queries that will be sent to a replica of this deployment + // without receiving a response. Defaults to 100. int32 max_concurrent_queries = 2; - // Arguments to pass to the reconfigure method of the deployment. The reconfigure method is - // called if user_config is not None. + // Arguments to pass to the reconfigure method of the deployment. The reconfigure method + // is called if user_config is not None. bytes user_config = 3; - // Duration that deployment replicas will wait until there is no more work to be done before - // shutting down. Defaults to 2s. + // Duration that deployment replicas will wait until there is no more work to be done + // before shutting down. Defaults to 2s. double graceful_shutdown_wait_loop_s = 4; // Controller waits for this duration to forcefully kill the replica for shutdown.