From 9e70bff92334641998f8b8235409a3bcd91e3fed Mon Sep 17 00:00:00 2001
From: Ed Oakes <ed.nmi.oakes@gmail.com>
Date: Sun, 31 Oct 2021 10:38:41 -0500
Subject: [PATCH 1/2] BackendConfig -> DeploymentConfig

---
 .../java/io/ray/serve/DeploymentInfo.java     | 10 +--
 .../java/io/ray/serve/RayServeReplica.java    | 14 ++---
 .../io/ray/serve/RayServeWrappedReplica.java  | 18 +++---
 .../main/java/io/ray/serve/ReplicaSet.java    |  6 +-
 .../src/main/java/io/ray/serve/Router.java    |  2 +-
 .../io/ray/serve/poll/LongPollClient.java     |  2 +-
 .../io/ray/serve/util/ServeProtoUtil.java     | 39 ++++++------
 .../java/io/ray/serve/ProxyActorTest.java     |  4 +-
 .../java/io/ray/serve/RayServeHandleTest.java | 12 ++--
 .../io/ray/serve/RayServeReplicaTest.java     | 12 ++--
 .../java/io/ray/serve/ReplicaSetTest.java     | 14 ++---
 .../test/java/io/ray/serve/RouterTest.java    | 12 ++--
 .../io/ray/serve/poll/LongPollClientTest.java | 16 ++---
 python/ray/serve/api.py                       | 60 +++++++++---------
 python/ray/serve/backend_state.py             | 42 +++++++------
 python/ray/serve/common.py                    |  6 +-
 python/ray/serve/config.py                    | 61 +++++++++----------
 python/ray/serve/controller.py                | 30 ++++-----
 python/ray/serve/replica.py                   | 43 ++++++-------
 python/ray/serve/tests/test_backend_state.py  |  9 +--
 python/ray/serve/tests/test_config.py         | 30 ++++-----
 src/ray/protobuf/serve.proto                  | 22 +++----
 22 files changed, 236 insertions(+), 228 deletions(-)

diff --git a/java/serve/src/main/java/io/ray/serve/DeploymentInfo.java b/java/serve/src/main/java/io/ray/serve/DeploymentInfo.java
index 0a6117b21133..943be34514c8 100644
--- a/java/serve/src/main/java/io/ray/serve/DeploymentInfo.java
+++ b/java/serve/src/main/java/io/ray/serve/DeploymentInfo.java
@@ -6,18 +6,18 @@ public class DeploymentInfo implements Serializable {
 
   private static final long serialVersionUID = -4198364411759931955L;
 
-  private byte[] backendConfig;
+  private byte[] deploymentConfig;
 
   private ReplicaConfig replicaConfig;
 
   private byte[] deploymentVersion;
 
-  public byte[] getBackendConfig() {
-    return backendConfig;
+  public byte[] getDeploymentConfig() {
+    return deploymentConfig;
   }
 
-  public void setBackendConfig(byte[] backendConfig) {
-    this.backendConfig = backendConfig;
+  public void setDeploymentConfig(byte[] deploymentConfig) {
+    this.deploymentConfig = deploymentConfig;
   }
 
   public ReplicaConfig getReplicaConfig() {
diff --git a/java/serve/src/main/java/io/ray/serve/RayServeReplica.java b/java/serve/src/main/java/io/ray/serve/RayServeReplica.java
index c8445ca94238..7151f175fd65 100644
--- a/java/serve/src/main/java/io/ray/serve/RayServeReplica.java
+++ b/java/serve/src/main/java/io/ray/serve/RayServeReplica.java
@@ -9,7 +9,7 @@
 import io.ray.runtime.metric.Metrics;
 import io.ray.runtime.serializer.MessagePackSerializer;
 import io.ray.serve.api.Serve;
-import io.ray.serve.generated.BackendConfig;
+import io.ray.serve.generated.DeploymentConfig;
 import io.ray.serve.generated.DeploymentVersion;
 import io.ray.serve.generated.RequestWrapper;
 import io.ray.serve.poll.KeyListener;
@@ -34,7 +34,7 @@ public class RayServeReplica {
 
   private String replicaTag;
 
-  private BackendConfig config;
+  private DeploymentConfig config;
 
   private AtomicInteger numOngoingRequests = new AtomicInteger();
 
@@ -58,19 +58,19 @@ public class RayServeReplica {
 
   public RayServeReplica(
       Object callable,
-      BackendConfig backendConfig,
+      DeploymentConfig deploymentConfig,
       DeploymentVersion version,
       BaseActorHandle actorHandle) {
     this.backendTag = Serve.getReplicaContext().getBackendTag();
     this.replicaTag = Serve.getReplicaContext().getReplicaTag();
     this.callable = callable;
-    this.config = backendConfig;
+    this.config = deploymentConfig;
     this.version = version;
 
     Map<KeyType, KeyListener> keyListeners = new HashMap<>();
     keyListeners.put(
         new KeyType(LongPollNamespace.BACKEND_CONFIGS, backendTag),
-        newConfig -> updateBackendConfigs(newConfig));
+        newConfig -> updateDeploymentConfigs(newConfig));
     this.longPollClient = new LongPollClient(actorHandle, keyListeners);
     this.longPollClient.start();
     registerMetrics();
@@ -319,8 +319,8 @@ public DeploymentVersion reconfigure(Object userConfig) {
    *
    * @param newConfig the new configuration of backend
    */
-  private void updateBackendConfigs(Object newConfig) {
-    config = (BackendConfig) newConfig;
+  private void updateDeploymentConfigs(Object newConfig) {
+    config = (DeploymentConfig) newConfig;
   }
 
   public DeploymentVersion getVersion() {
diff --git a/java/serve/src/main/java/io/ray/serve/RayServeWrappedReplica.java b/java/serve/src/main/java/io/ray/serve/RayServeWrappedReplica.java
index e3228c9c0aaf..545feb81da94 100644
--- a/java/serve/src/main/java/io/ray/serve/RayServeWrappedReplica.java
+++ b/java/serve/src/main/java/io/ray/serve/RayServeWrappedReplica.java
@@ -6,7 +6,7 @@
 import io.ray.api.Ray;
 import io.ray.runtime.serializer.MessagePackSerializer;
 import io.ray.serve.api.Serve;
-import io.ray.serve.generated.BackendConfig;
+import io.ray.serve.generated.DeploymentConfig;
 import io.ray.serve.generated.DeploymentVersion;
 import io.ray.serve.generated.RequestMetadata;
 import io.ray.serve.util.ReflectUtil;
@@ -27,17 +27,17 @@ public RayServeWrappedReplica(
       String replicaTag,
       String backendDef,
       byte[] initArgsbytes,
-      byte[] backendConfigBytes,
+      byte[] deploymentConfigBytes,
       byte[] deploymentVersionBytes,
       String controllerName)
       throws ClassNotFoundException, NoSuchMethodException, InstantiationException,
           IllegalAccessException, IllegalArgumentException, InvocationTargetException, IOException {
 
-    // Parse BackendConfig.
-    BackendConfig backendConfig = ServeProtoUtil.parseBackendConfig(backendConfigBytes);
+    // Parse DeploymentConfig.
+    DeploymentConfig deploymentConfig = ServeProtoUtil.parseDeploymentConfig(deploymentConfigBytes);
 
     // Parse init args.
-    Object[] initArgs = parseInitArgs(initArgsbytes, backendConfig);
+    Object[] initArgs = parseInitArgs(initArgsbytes, deploymentConfig);
 
     // Instantiate the object defined by backendDef.
     Class backendClass = Class.forName(backendDef);
@@ -57,7 +57,7 @@ public RayServeWrappedReplica(
     backend =
         new RayServeReplica(
             callable,
-            backendConfig,
+            deploymentConfig,
             ServeProtoUtil.parseDeploymentVersion(deploymentVersionBytes),
             optional.get());
   }
@@ -71,19 +71,19 @@ public RayServeWrappedReplica(
         replicaTag,
         deploymentInfo.getReplicaConfig().getBackendDef(),
         deploymentInfo.getReplicaConfig().getInitArgs(),
-        deploymentInfo.getBackendConfig(),
+        deploymentInfo.getDeploymentConfig(),
         deploymentInfo.getDeploymentVersion(),
         controllerName);
   }
 
-  private Object[] parseInitArgs(byte[] initArgsbytes, BackendConfig backendConfig)
+  private Object[] parseInitArgs(byte[] initArgsbytes, DeploymentConfig deploymentConfig)
       throws IOException {
 
     if (initArgsbytes == null || initArgsbytes.length == 0) {
       return new Object[0];
     }
 
-    if (!backendConfig.getIsCrossLanguage()) {
+    if (!deploymentConfig.getIsCrossLanguage()) {
       // If the construction request is from Java API, deserialize initArgsbytes to Object[]
       // directly.
       return MessagePackSerializer.decode(initArgsbytes, Object[].class);
diff --git a/java/serve/src/main/java/io/ray/serve/ReplicaSet.java b/java/serve/src/main/java/io/ray/serve/ReplicaSet.java
index 1c7e757bba44..0b5646f4bd5c 100644
--- a/java/serve/src/main/java/io/ray/serve/ReplicaSet.java
+++ b/java/serve/src/main/java/io/ray/serve/ReplicaSet.java
@@ -9,7 +9,7 @@
 import io.ray.runtime.metric.Metrics;
 import io.ray.runtime.metric.TagKey;
 import io.ray.serve.generated.ActorSet;
-import io.ray.serve.generated.BackendConfig;
+import io.ray.serve.generated.DeploymentConfig;
 import io.ray.serve.util.CollectionUtil;
 import java.util.ArrayList;
 import java.util.HashSet;
@@ -48,8 +48,8 @@ public ReplicaSet(String backendTag) {
                     .register());
   }
 
-  public void setMaxConcurrentQueries(Object backendConfig) {
-    int newValue = ((BackendConfig) backendConfig).getMaxConcurrentQueries();
+  public void setMaxConcurrentQueries(Object deploymentConfig) {
+    int newValue = ((DeploymentConfig) deploymentConfig).getMaxConcurrentQueries();
     if (newValue != this.maxConcurrentQueries) {
       this.maxConcurrentQueries = newValue;
       LOGGER.info("ReplicaSet: changing max_concurrent_queries to {}", newValue);
diff --git a/java/serve/src/main/java/io/ray/serve/Router.java b/java/serve/src/main/java/io/ray/serve/Router.java
index 5ef339d77767..0b744c835a09 100644
--- a/java/serve/src/main/java/io/ray/serve/Router.java
+++ b/java/serve/src/main/java/io/ray/serve/Router.java
@@ -38,7 +38,7 @@ public Router(BaseActorHandle controllerHandle, String backendTag) {
     Map<KeyType, KeyListener> keyListeners = new HashMap<>();
     keyListeners.put(
         new KeyType(LongPollNamespace.BACKEND_CONFIGS, backendTag),
-        backendConfig -> replicaSet.setMaxConcurrentQueries(backendConfig)); // cross language
+        deploymentConfig -> replicaSet.setMaxConcurrentQueries(deploymentConfig)); // cross language
     keyListeners.put(
         new KeyType(LongPollNamespace.REPLICA_HANDLES, backendTag),
         workerReplicas -> replicaSet.updateWorkerReplicas(workerReplicas)); // cross language
diff --git a/java/serve/src/main/java/io/ray/serve/poll/LongPollClient.java b/java/serve/src/main/java/io/ray/serve/poll/LongPollClient.java
index 308391254e10..54d107b717aa 100644
--- a/java/serve/src/main/java/io/ray/serve/poll/LongPollClient.java
+++ b/java/serve/src/main/java/io/ray/serve/poll/LongPollClient.java
@@ -47,7 +47,7 @@ public class LongPollClient {
 
   static {
     DESERIALIZERS.put(
-        LongPollNamespace.BACKEND_CONFIGS, body -> ServeProtoUtil.parseBackendConfig(body));
+        LongPollNamespace.BACKEND_CONFIGS, body -> ServeProtoUtil.parseDeploymentConfig(body));
     DESERIALIZERS.put(
         LongPollNamespace.REPLICA_HANDLES, body -> ServeProtoUtil.parseEndpointSet(body));
     DESERIALIZERS.put(
diff --git a/java/serve/src/main/java/io/ray/serve/util/ServeProtoUtil.java b/java/serve/src/main/java/io/ray/serve/util/ServeProtoUtil.java
index eeee061a9fb8..edcc8399e101 100644
--- a/java/serve/src/main/java/io/ray/serve/util/ServeProtoUtil.java
+++ b/java/serve/src/main/java/io/ray/serve/util/ServeProtoUtil.java
@@ -7,8 +7,8 @@
 import io.ray.runtime.serializer.MessagePackSerializer;
 import io.ray.serve.Constants;
 import io.ray.serve.RayServeException;
-import io.ray.serve.generated.BackendConfig;
-import io.ray.serve.generated.BackendLanguage;
+import io.ray.serve.generated.DeploymentConfig;
+import io.ray.serve.generated.DeploymentLanguage;
 import io.ray.serve.generated.DeploymentVersion;
 import io.ray.serve.generated.EndpointInfo;
 import io.ray.serve.generated.EndpointSet;
@@ -25,23 +25,23 @@ public class ServeProtoUtil {
 
   private static final Gson GSON = new Gson();
 
-  public static BackendConfig parseBackendConfig(byte[] backendConfigBytes) {
+  public static DeploymentConfig parseDeploymentConfig(byte[] deploymentConfigBytes) {
 
-    // Get a builder from BackendConfig(bytes) or create a new one.
-    BackendConfig.Builder builder = null;
-    if (backendConfigBytes == null) {
-      builder = BackendConfig.newBuilder();
+    // Get a builder from DeploymentConfig(bytes) or create a new one.
+    DeploymentConfig.Builder builder = null;
+    if (deploymentConfigBytes == null) {
+      builder = DeploymentConfig.newBuilder();
     } else {
-      BackendConfig backendConfig = null;
+      DeploymentConfig deploymentConfig = null;
       try {
-        backendConfig = BackendConfig.parseFrom(backendConfigBytes);
+        deploymentConfig = DeploymentConfig.parseFrom(deploymentConfigBytes);
       } catch (InvalidProtocolBufferException e) {
-        throw new RayServeException("Failed to parse BackendConfig from protobuf bytes.", e);
+        throw new RayServeException("Failed to parse DeploymentConfig from protobuf bytes.", e);
       }
-      if (backendConfig == null) {
-        builder = BackendConfig.newBuilder();
+      if (deploymentConfig == null) {
+        builder = DeploymentConfig.newBuilder();
       } else {
-        builder = BackendConfig.newBuilder(backendConfig);
+        builder = DeploymentConfig.newBuilder(deploymentConfig);
       }
     }
 
@@ -64,22 +64,23 @@ public static BackendConfig parseBackendConfig(byte[] backendConfigBytes) {
       builder.setGracefulShutdownTimeoutS(20);
     }
 
-    if (builder.getBackendLanguage() == BackendLanguage.UNRECOGNIZED) {
+    if (builder.getDeploymentLanguage() == DeploymentLanguage.UNRECOGNIZED) {
       throw new RayServeException(
           LogUtil.format(
               "Unrecognized backend language {}. Backend language must be in {}.",
-              builder.getBackendLanguageValue(),
-              Lists.newArrayList(BackendLanguage.values())));
+              builder.getDeploymentLanguageValue(),
+              Lists.newArrayList(DeploymentLanguage.values())));
     }
 
     return builder.build();
   }
 
-  public static Object parseUserConfig(BackendConfig backendConfig) {
-    if (backendConfig.getUserConfig() == null || backendConfig.getUserConfig().size() == 0) {
+  public static Object parseUserConfig(DeploymentConfig deploymentConfig) {
+    if (deploymentConfig.getUserConfig() == null || deploymentConfig.getUserConfig().size() == 0) {
       return null;
     }
-    return MessagePackSerializer.decode(backendConfig.getUserConfig().toByteArray(), Object.class);
+    return MessagePackSerializer.decode(
+        deploymentConfig.getUserConfig().toByteArray(), Object.class);
   }
 
   public static RequestMetadata parseRequestMetadata(byte[] requestMetadataBytes)
diff --git a/java/serve/src/test/java/io/ray/serve/ProxyActorTest.java b/java/serve/src/test/java/io/ray/serve/ProxyActorTest.java
index 632e2805b962..69f7e0ed987d 100644
--- a/java/serve/src/test/java/io/ray/serve/ProxyActorTest.java
+++ b/java/serve/src/test/java/io/ray/serve/ProxyActorTest.java
@@ -5,7 +5,7 @@
 import io.ray.runtime.serializer.MessagePackSerializer;
 import io.ray.serve.api.Serve;
 import io.ray.serve.generated.ActorSet;
-import io.ray.serve.generated.BackendConfig;
+import io.ray.serve.generated.DeploymentConfig;
 import io.ray.serve.generated.DeploymentVersion;
 import io.ray.serve.generated.EndpointInfo;
 import io.ray.serve.util.CommonUtil;
@@ -51,7 +51,7 @@ public void test() throws IOException {
 
       // Replica
       DeploymentInfo deploymentInfo = new DeploymentInfo();
-      deploymentInfo.setBackendConfig(BackendConfig.newBuilder().build().toByteArray());
+      deploymentInfo.setDeploymentConfig(DeploymentConfig.newBuilder().build().toByteArray());
       deploymentInfo.setDeploymentVersion(
           DeploymentVersion.newBuilder().setCodeVersion(version).build().toByteArray());
       deploymentInfo.setReplicaConfig(
diff --git a/java/serve/src/test/java/io/ray/serve/RayServeHandleTest.java b/java/serve/src/test/java/io/ray/serve/RayServeHandleTest.java
index ae2a6e22dddd..243ea1d5657b 100644
--- a/java/serve/src/test/java/io/ray/serve/RayServeHandleTest.java
+++ b/java/serve/src/test/java/io/ray/serve/RayServeHandleTest.java
@@ -5,8 +5,8 @@
 import io.ray.api.Ray;
 import io.ray.runtime.serializer.MessagePackSerializer;
 import io.ray.serve.generated.ActorSet;
-import io.ray.serve.generated.BackendConfig;
-import io.ray.serve.generated.BackendLanguage;
+import io.ray.serve.generated.DeploymentConfig;
+import io.ray.serve.generated.DeploymentLanguage;
 import io.ray.serve.generated.DeploymentVersion;
 import java.util.HashMap;
 import org.testng.Assert;
@@ -31,15 +31,15 @@ public void test() {
           Ray.actor(DummyServeController::new).setName(controllerName).remote();
 
       // Replica
-      BackendConfig.Builder backendConfigBuilder = BackendConfig.newBuilder();
-      backendConfigBuilder.setBackendLanguage(BackendLanguage.JAVA);
-      byte[] backendConfigBytes = backendConfigBuilder.build().toByteArray();
+      DeploymentConfig.Builder deploymentConfigBuilder = DeploymentConfig.newBuilder();
+      deploymentConfigBuilder.setDeploymentLanguage(DeploymentLanguage.JAVA);
+      byte[] deploymentConfigBytes = deploymentConfigBuilder.build().toByteArray();
 
       Object[] initArgs = new Object[] {backendTag, replicaTag, controllerName, new Object()};
       byte[] initArgsBytes = MessagePackSerializer.encode(initArgs).getLeft();
 
       DeploymentInfo deploymentInfo = new DeploymentInfo();
-      deploymentInfo.setBackendConfig(backendConfigBytes);
+      deploymentInfo.setDeploymentConfig(deploymentConfigBytes);
       deploymentInfo.setDeploymentVersion(
           DeploymentVersion.newBuilder().setCodeVersion(version).build().toByteArray());
       deploymentInfo.setReplicaConfig(
diff --git a/java/serve/src/test/java/io/ray/serve/RayServeReplicaTest.java b/java/serve/src/test/java/io/ray/serve/RayServeReplicaTest.java
index 7c0e61b95221..7e9d5d5e5577 100644
--- a/java/serve/src/test/java/io/ray/serve/RayServeReplicaTest.java
+++ b/java/serve/src/test/java/io/ray/serve/RayServeReplicaTest.java
@@ -4,8 +4,8 @@
 import io.ray.api.ObjectRef;
 import io.ray.api.Ray;
 import io.ray.runtime.serializer.MessagePackSerializer;
-import io.ray.serve.generated.BackendConfig;
-import io.ray.serve.generated.BackendLanguage;
+import io.ray.serve.generated.DeploymentConfig;
+import io.ray.serve.generated.DeploymentLanguage;
 import io.ray.serve.generated.DeploymentVersion;
 import io.ray.serve.generated.RequestMetadata;
 import io.ray.serve.generated.RequestWrapper;
@@ -32,14 +32,14 @@ public void test() throws IOException {
       ActorHandle<DummyServeController> controllerHandle =
           Ray.actor(DummyServeController::new).setName(controllerName).remote();
 
-      BackendConfig.Builder backendConfigBuilder = BackendConfig.newBuilder();
-      backendConfigBuilder.setBackendLanguage(BackendLanguage.JAVA);
-      byte[] backendConfigBytes = backendConfigBuilder.build().toByteArray();
+      DeploymentConfig.Builder deploymentConfigBuilder = DeploymentConfig.newBuilder();
+      deploymentConfigBuilder.setDeploymentLanguage(DeploymentLanguage.JAVA);
+      byte[] deploymentConfigBytes = deploymentConfigBuilder.build().toByteArray();
       Object[] initArgs = new Object[] {backendTag, replicaTag, controllerName, new Object()};
       byte[] initArgsBytes = MessagePackSerializer.encode(initArgs).getLeft();
 
       DeploymentInfo deploymentInfo = new DeploymentInfo();
-      deploymentInfo.setBackendConfig(backendConfigBytes);
+      deploymentInfo.setDeploymentConfig(deploymentConfigBytes);
       deploymentInfo.setDeploymentVersion(
           DeploymentVersion.newBuilder().setCodeVersion(version).build().toByteArray());
       deploymentInfo.setReplicaConfig(
diff --git a/java/serve/src/test/java/io/ray/serve/ReplicaSetTest.java b/java/serve/src/test/java/io/ray/serve/ReplicaSetTest.java
index 972d357dd114..d70edcec3082 100644
--- a/java/serve/src/test/java/io/ray/serve/ReplicaSetTest.java
+++ b/java/serve/src/test/java/io/ray/serve/ReplicaSetTest.java
@@ -5,8 +5,8 @@
 import io.ray.api.Ray;
 import io.ray.runtime.serializer.MessagePackSerializer;
 import io.ray.serve.generated.ActorSet;
-import io.ray.serve.generated.BackendConfig;
-import io.ray.serve.generated.BackendLanguage;
+import io.ray.serve.generated.DeploymentConfig;
+import io.ray.serve.generated.DeploymentLanguage;
 import io.ray.serve.generated.DeploymentVersion;
 import io.ray.serve.generated.RequestMetadata;
 import java.util.HashMap;
@@ -23,7 +23,7 @@ public class ReplicaSetTest {
   @Test
   public void setMaxConcurrentQueriesTest() {
     ReplicaSet replicaSet = new ReplicaSet(backendTag);
-    BackendConfig.Builder builder = BackendConfig.newBuilder();
+    DeploymentConfig.Builder builder = DeploymentConfig.newBuilder();
     builder.setMaxConcurrentQueries(200);
 
     replicaSet.setMaxConcurrentQueries(builder.build());
@@ -58,15 +58,15 @@ public void assignReplicaTest() {
           Ray.actor(DummyServeController::new).setName(controllerName).remote();
 
       // Replica
-      BackendConfig.Builder backendConfigBuilder = BackendConfig.newBuilder();
-      backendConfigBuilder.setBackendLanguage(BackendLanguage.JAVA);
-      byte[] backendConfigBytes = backendConfigBuilder.build().toByteArray();
+      DeploymentConfig.Builder deploymentConfigBuilder = DeploymentConfig.newBuilder();
+      deploymentConfigBuilder.setDeploymentLanguage(DeploymentLanguage.JAVA);
+      byte[] deploymentConfigBytes = deploymentConfigBuilder.build().toByteArray();
 
       Object[] initArgs = new Object[] {backendTag, replicaTag, controllerName, new Object()};
       byte[] initArgsBytes = MessagePackSerializer.encode(initArgs).getLeft();
 
       DeploymentInfo deploymentInfo = new DeploymentInfo();
-      deploymentInfo.setBackendConfig(backendConfigBytes);
+      deploymentInfo.setDeploymentConfig(deploymentConfigBytes);
       deploymentInfo.setDeploymentVersion(
           DeploymentVersion.newBuilder().setCodeVersion(version).build().toByteArray());
       deploymentInfo.setReplicaConfig(
diff --git a/java/serve/src/test/java/io/ray/serve/RouterTest.java b/java/serve/src/test/java/io/ray/serve/RouterTest.java
index f52f8822eaba..03ee6da4a6ef 100644
--- a/java/serve/src/test/java/io/ray/serve/RouterTest.java
+++ b/java/serve/src/test/java/io/ray/serve/RouterTest.java
@@ -5,8 +5,8 @@
 import io.ray.api.Ray;
 import io.ray.runtime.serializer.MessagePackSerializer;
 import io.ray.serve.generated.ActorSet;
-import io.ray.serve.generated.BackendConfig;
-import io.ray.serve.generated.BackendLanguage;
+import io.ray.serve.generated.DeploymentConfig;
+import io.ray.serve.generated.DeploymentLanguage;
 import io.ray.serve.generated.DeploymentVersion;
 import io.ray.serve.generated.RequestMetadata;
 import java.util.HashMap;
@@ -33,15 +33,15 @@ public void test() {
           Ray.actor(DummyServeController::new).setName(controllerName).remote();
 
       // Replica
-      BackendConfig.Builder backendConfigBuilder = BackendConfig.newBuilder();
-      backendConfigBuilder.setBackendLanguage(BackendLanguage.JAVA);
-      byte[] backendConfigBytes = backendConfigBuilder.build().toByteArray();
+      DeploymentConfig.Builder deploymentConfigBuilder = DeploymentConfig.newBuilder();
+      deploymentConfigBuilder.setDeploymentLanguage(DeploymentLanguage.JAVA);
+      byte[] deploymentConfigBytes = deploymentConfigBuilder.build().toByteArray();
 
       Object[] initArgs = new Object[] {backendTag, replicaTag, controllerName, new Object()};
       byte[] initArgsBytes = MessagePackSerializer.encode(initArgs).getLeft();
 
       DeploymentInfo deploymentInfo = new DeploymentInfo();
-      deploymentInfo.setBackendConfig(backendConfigBytes);
+      deploymentInfo.setDeploymentConfig(deploymentConfigBytes);
       deploymentInfo.setDeploymentVersion(
           DeploymentVersion.newBuilder().setCodeVersion(version).build().toByteArray());
       deploymentInfo.setReplicaConfig(
diff --git a/java/serve/src/test/java/io/ray/serve/poll/LongPollClientTest.java b/java/serve/src/test/java/io/ray/serve/poll/LongPollClientTest.java
index 7ee254806fad..6bda0c277907 100644
--- a/java/serve/src/test/java/io/ray/serve/poll/LongPollClientTest.java
+++ b/java/serve/src/test/java/io/ray/serve/poll/LongPollClientTest.java
@@ -1,7 +1,7 @@
 package io.ray.serve.poll;
 
 import com.google.protobuf.ByteString;
-import io.ray.serve.generated.BackendConfig;
+import io.ray.serve.generated.DeploymentConfig;
 import io.ray.serve.generated.UpdatedObject;
 import java.util.HashMap;
 import java.util.Map;
@@ -19,18 +19,18 @@ public void test() throws Throwable {
     KeyType keyType = new KeyType(LongPollNamespace.BACKEND_CONFIGS, "backendTag");
     Map<KeyType, KeyListener> keyListeners = new HashMap<>();
     keyListeners.put(
-        keyType, (object) -> a[0] = String.valueOf(((BackendConfig) object).getNumReplicas()));
+        keyType, (object) -> a[0] = String.valueOf(((DeploymentConfig) object).getNumReplicas()));
 
     // Initialize LongPollClient.
     LongPollClient longPollClient = new LongPollClient(null, keyListeners);
 
     // Construct updated object.
-    BackendConfig.Builder backendConfig = BackendConfig.newBuilder();
-    backendConfig.setNumReplicas(20);
+    DeploymentConfig.Builder deploymentConfig = DeploymentConfig.newBuilder();
+    deploymentConfig.setNumReplicas(20);
     int snapshotId = 10;
     UpdatedObject.Builder updatedObject = UpdatedObject.newBuilder();
     updatedObject.setSnapshotId(snapshotId);
-    updatedObject.setObjectSnapshot(ByteString.copyFrom(backendConfig.build().toByteArray()));
+    updatedObject.setObjectSnapshot(ByteString.copyFrom(deploymentConfig.build().toByteArray()));
 
     // Process update.
     Map<KeyType, UpdatedObject> updates = new HashMap<>();
@@ -40,8 +40,8 @@ public void test() throws Throwable {
     // Validation.
     Assert.assertEquals(longPollClient.getSnapshotIds().get(keyType).intValue(), snapshotId);
     Assert.assertEquals(
-        ((BackendConfig) longPollClient.getObjectSnapshots().get(keyType)).getNumReplicas(),
-        backendConfig.getNumReplicas());
-    Assert.assertEquals(a[0], String.valueOf(backendConfig.getNumReplicas()));
+        ((DeploymentConfig) longPollClient.getObjectSnapshots().get(keyType)).getNumReplicas(),
+        deploymentConfig.getNumReplicas());
+    Assert.assertEquals(a[0], String.valueOf(deploymentConfig.getNumReplicas()));
   }
 }
diff --git a/python/ray/serve/api.py b/python/ray/serve/api.py
index 06bacdf2b2d1..952e9aee57fa 100644
--- a/python/ray/serve/api.py
+++ b/python/ray/serve/api.py
@@ -17,7 +17,7 @@
 
 from ray.actor import ActorHandle
 from ray.serve.common import BackendInfo, GoalId, ReplicaTag
-from ray.serve.config import (AutoscalingConfig, BackendConfig, HTTPOptions,
+from ray.serve.config import (AutoscalingConfig, DeploymentConfig, HTTPOptions,
                               ReplicaConfig)
 from ray.serve.constants import (DEFAULT_CHECKPOINT_PATH, HTTP_PROXY_TIMEOUT,
                                  SERVE_CONTROLLER_NAME, MAX_CACHED_HANDLES,
@@ -187,18 +187,19 @@ def _wait_for_goal(self,
             return False
 
     @_ensure_connected
-    def deploy(self,
-               name: str,
-               backend_def: Union[Callable, Type[Callable], str],
-               init_args: Tuple[Any],
-               init_kwargs: Dict[Any, Any],
-               ray_actor_options: Optional[Dict] = None,
-               config: Optional[Union[BackendConfig, Dict[str, Any]]] = None,
-               version: Optional[str] = None,
-               prev_version: Optional[str] = None,
-               route_prefix: Optional[str] = None,
-               url: str = "",
-               _blocking: Optional[bool] = True) -> Optional[GoalId]:
+    def deploy(
+            self,
+            name: str,
+            deployment_def: Union[Callable, Type[Callable], str],
+            init_args: Tuple[Any],
+            init_kwargs: Dict[Any, Any],
+            ray_actor_options: Optional[Dict] = None,
+            config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None,
+            version: Optional[str] = None,
+            prev_version: Optional[str] = None,
+            route_prefix: Optional[str] = None,
+            url: str = "",
+            _blocking: Optional[bool] = True) -> Optional[GoalId]:
         if config is None:
             config = {}
         if ray_actor_options is None:
@@ -212,23 +213,25 @@ def deploy(self,
             ray_actor_options["runtime_env"] = curr_job_env
 
         replica_config = ReplicaConfig(
-            backend_def,
+            deployment_def,
             init_args=init_args,
             init_kwargs=init_kwargs,
             ray_actor_options=ray_actor_options)
 
         if isinstance(config, dict):
-            backend_config = BackendConfig.parse_obj(config)
-        elif isinstance(config, BackendConfig):
-            backend_config = config
+            deployment_config = DeploymentConfig.parse_obj(config)
+        elif isinstance(config, DeploymentConfig):
+            deployment_config = config
         else:
-            raise TypeError("config must be a BackendConfig or a dictionary.")
+            raise TypeError(
+                "config must be a DeploymentConfig or a dictionary.")
 
         goal_id, updating = ray.get(
-            self._controller.deploy.remote(
-                name, backend_config.to_proto_bytes(), replica_config, version,
-                prev_version, route_prefix,
-                ray.get_runtime_context().job_id))
+            self._controller.deploy.remote(name,
+                                           deployment_config.to_proto_bytes(),
+                                           replica_config, version,
+                                           prev_version, route_prefix,
+                                           ray.get_runtime_context().job_id))
 
         tag = f"component=serve deployment={name}"
 
@@ -626,7 +629,7 @@ class Deployment:
     def __init__(self,
                  func_or_class: Callable,
                  name: str,
-                 config: BackendConfig,
+                 config: DeploymentConfig,
                  version: Optional[str] = None,
                  prev_version: Optional[str] = None,
                  init_args: Optional[Tuple[Any]] = None,
@@ -1021,7 +1024,7 @@ class MyDeployment:
         raise ValueError("Manually setting num_replicas is not allowed when "
                          "_autoscaling_config is provided.")
 
-    config = BackendConfig()
+    config = DeploymentConfig()
     if num_replicas is not None:
         config.num_replicas = num_replicas
 
@@ -1085,9 +1088,10 @@ def get_deployment(name: str) -> Deployment:
         raise KeyError(f"Deployment {name} was not found. "
                        "Did you call Deployment.deploy()?")
     return Deployment(
-        cloudpickle.loads(backend_info.replica_config.serialized_backend_def),
+        cloudpickle.loads(
+            backend_info.replica_config.serialized_deployment_def),
         name,
-        backend_info.backend_config,
+        backend_info.deployment_config,
         version=backend_info.version,
         init_args=backend_info.replica_config.init_args,
         init_kwargs=backend_info.replica_config.init_kwargs,
@@ -1109,9 +1113,9 @@ def list_deployments() -> Dict[str, Deployment]:
     for name, (backend_info, route_prefix) in infos.items():
         deployments[name] = Deployment(
             cloudpickle.loads(
-                backend_info.replica_config.serialized_backend_def),
+                backend_info.replica_config.serialized_deployment_def),
             name,
-            backend_info.backend_config,
+            backend_info.deployment_config,
             version=backend_info.version,
             init_args=backend_info.replica_config.init_args,
             init_kwargs=backend_info.replica_config.init_kwargs,
diff --git a/python/ray/serve/backend_state.py b/python/ray/serve/backend_state.py
index 8f23bc3fba75..7b00235f34af 100644
--- a/python/ray/serve/backend_state.py
+++ b/python/ray/serve/backend_state.py
@@ -11,7 +11,7 @@
 from ray.serve.async_goal_manager import AsyncGoalManager
 from ray.serve.common import (BackendInfo, BackendTag, Duration, GoalId,
                               ReplicaTag, ReplicaName, RunningReplicaInfo)
-from ray.serve.config import BackendConfig
+from ray.serve.config import DeploymentConfig
 from ray.serve.constants import (
     CONTROLLER_STARTUP_GRACE_PERIOD_S, SERVE_CONTROLLER_NAME, SERVE_PROXY_NAME,
     MAX_DEPLOYMENT_CONSTRUCTOR_RETRY_COUNT, MAX_NUM_DELETED_DEPLOYMENTS)
@@ -156,9 +156,9 @@ def start(self, backend_info: BackendInfo, version: DeploymentVersion):
         """
         self._actor_resources = backend_info.replica_config.resource_dict
         self._max_concurrent_queries = (
-            backend_info.backend_config.max_concurrent_queries)
+            backend_info.deployment_config.max_concurrent_queries)
         self._graceful_shutdown_timeout_s = (
-            backend_info.backend_config.graceful_shutdown_timeout_s)
+            backend_info.deployment_config.graceful_shutdown_timeout_s)
         if USE_PLACEMENT_GROUP:
             self._placement_group = self.create_placement_group(
                 self._placement_group_name, self._actor_resources)
@@ -177,11 +177,11 @@ def start(self, backend_info: BackendInfo, version: DeploymentVersion):
                 self.backend_tag, self.replica_tag,
                 backend_info.replica_config.init_args,
                 backend_info.replica_config.init_kwargs,
-                backend_info.backend_config.to_proto_bytes(), version,
+                backend_info.deployment_config.to_proto_bytes(), version,
                 self._controller_name, self._detached)
 
         self._ready_obj_ref = self._actor_handle.reconfigure.remote(
-            backend_info.backend_config.user_config)
+            backend_info.deployment_config.user_config)
 
     def update_user_config(self, user_config: Any):
         """
@@ -242,11 +242,11 @@ def check_ready(
             return ReplicaStartupStatus.PENDING, None
         elif len(ready) > 0:
             try:
-                backend_config, version = ray.get(ready)[0]
+                deployment_config, version = ray.get(ready)[0]
                 self._max_concurrent_queries = (
-                    backend_config.max_concurrent_queries)
+                    deployment_config.max_concurrent_queries)
                 self._graceful_shutdown_timeout_s = (
-                    backend_config.graceful_shutdown_timeout_s)
+                    deployment_config.graceful_shutdown_timeout_s)
             except Exception:
                 return ReplicaStartupStatus.FAILED, None
 
@@ -726,11 +726,11 @@ def _set_backend_goal(self, backend_info: Optional[BackendInfo]) -> None:
 
         if backend_info is not None:
             self._target_info = backend_info
-            self._target_replicas = backend_info.backend_config.num_replicas
+            self._target_replicas = backend_info.deployment_config.num_replicas
 
             self._target_version = DeploymentVersion(
                 backend_info.version,
-                user_config=backend_info.backend_config.user_config)
+                user_config=backend_info.deployment_config.user_config)
 
         else:
             self._target_replicas = 0
@@ -746,7 +746,7 @@ def deploy(self,
                backend_info: BackendInfo) -> Tuple[Optional[GoalId], bool]:
         """Deploy the backend.
 
-        If the backend already exists with the same version and BackendConfig,
+        If the backend already exists with the same version and config,
         this is a no-op and returns the GoalId corresponding to the existing
         update if there is one.
 
@@ -760,7 +760,8 @@ def deploy(self,
             # Redeploying should not reset the deployment's start time.
             backend_info.start_time_ms = existing_info.start_time_ms
 
-            if (existing_info.backend_config == backend_info.backend_config
+            if (existing_info.deployment_config ==
+                    backend_info.deployment_config
                     and backend_info.version is not None
                     and existing_info.version == backend_info.version):
                 return self._curr_goal, False
@@ -1291,19 +1292,20 @@ def get_running_replica_infos(
 
         return replicas
 
-    def get_backend_configs(self,
-                            filter_tag: Optional[BackendTag] = None,
-                            include_deleted: Optional[bool] = False
-                            ) -> Dict[BackendTag, BackendConfig]:
-        configs: Dict[BackendTag, BackendConfig] = {}
+    def get_deployment_configs(self,
+                               filter_tag: Optional[BackendTag] = None,
+                               include_deleted: Optional[bool] = False
+                               ) -> Dict[BackendTag, DeploymentConfig]:
+        configs: Dict[BackendTag, DeploymentConfig] = {}
         for backend_tag, backend_state in self._backend_states.items():
             if filter_tag is None or backend_tag == filter_tag:
-                configs[backend_tag] = backend_state.target_info.backend_config
+                configs[
+                    backend_tag] = backend_state.target_info.deployment_config
 
         if include_deleted:
             for backend_tag, info in self._deleted_backend_metadata.items():
                 if filter_tag is None or backend_tag == filter_tag:
-                    configs[backend_tag] = info.backend_config
+                    configs[backend_tag] = info.deployment_config
 
         return configs
 
@@ -1322,7 +1324,7 @@ def deploy_backend(self, backend_tag: BackendTag, backend_info: BackendInfo
                        ) -> Tuple[Optional[GoalId], bool]:
         """Deploy the backend.
 
-        If the backend already exists with the same version and BackendConfig,
+        If the backend already exists with the same version and config,
         this is a no-op and returns the GoalId corresponding to the existing
         update if there is one.
 
diff --git a/python/ray/serve/common.py b/python/ray/serve/common.py
index 3c236a668d26..eb0a5dead6ea 100644
--- a/python/ray/serve/common.py
+++ b/python/ray/serve/common.py
@@ -5,7 +5,7 @@
 from uuid import UUID
 
 from ray.actor import ActorClass, ActorHandle
-from ray.serve.config import BackendConfig, ReplicaConfig
+from ray.serve.config import DeploymentConfig, ReplicaConfig
 from ray.serve.autoscaling_policy import AutoscalingPolicy
 
 BackendTag = str
@@ -23,7 +23,7 @@ class EndpointInfo:
 
 class BackendInfo:
     def __init__(self,
-                 backend_config: BackendConfig,
+                 deployment_config: DeploymentConfig,
                  replica_config: ReplicaConfig,
                  start_time_ms: int,
                  actor_def: Optional[ActorClass] = None,
@@ -31,7 +31,7 @@ def __init__(self,
                  deployer_job_id: "Optional[ray._raylet.JobID]" = None,
                  end_time_ms: Optional[int] = None,
                  autoscaling_policy: Optional[AutoscalingPolicy] = None):
-        self.backend_config = backend_config
+        self.deployment_config = deployment_config
         self.replica_config = replica_config
         # The time when .deploy() was first called for this deployment.
         self.start_time_ms = start_time_ms
diff --git a/python/ray/serve/config.py b/python/ray/serve/config.py
index 7f010d3ff37e..60a1e1344dd8 100644
--- a/python/ray/serve/config.py
+++ b/python/ray/serve/config.py
@@ -7,10 +7,10 @@
 from google.protobuf.json_format import MessageToDict
 from pydantic import BaseModel, NonNegativeFloat, PositiveInt, validator
 from ray.serve.constants import DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT
-from ray.serve.generated.serve_pb2 import (BackendConfig as BackendConfigProto,
-                                           AutoscalingConfig as
-                                           AutoscalingConfigProto)
-from ray.serve.generated.serve_pb2 import BackendLanguage
+from ray.serve.generated.serve_pb2 import (
+    DeploymentConfig as DeploymentConfigProto, AutoscalingConfig as
+    AutoscalingConfigProto)
+from ray.serve.generated.serve_pb2 import DeploymentLanguage
 
 from ray import cloudpickle as cloudpickle
 
@@ -56,21 +56,21 @@ class AutoscalingConfig(BaseModel):
     # TODO(architkulkarni): Add pydantic validation.  E.g. max_replicas>=min
 
 
-class BackendConfig(BaseModel):
-    """Configuration options for a backend, to be set by the user.
+class DeploymentConfig(BaseModel):
+    """Configuration options for a deployment, to be set by the user.
 
     Args:
         num_replicas (Optional[int]): The number of processes to start up that
-            will handle requests to this backend. Defaults to 1.
+            will handle requests to this deployment. Defaults to 1.
         max_concurrent_queries (Optional[int]): The maximum number of queries
-            that will be sent to a replica of this backend without receiving a
-            response. Defaults to 100.
+            that will be sent to a replica of this deployment without receiving
+            a response. Defaults to 100.
         user_config (Optional[Any]): Arguments to pass to the reconfigure
-            method of the backend. The reconfigure method is called if
+            method of the deployment. The reconfigure method is called if
             user_config is not None.
         graceful_shutdown_wait_loop_s (Optional[float]): Duration
-            that backend workers will wait until there is no more work to be
-            done before shutting down. Defaults to 2s.
+            that deployment replicas will wait until there is no more work to
+            be done before shutting down. Defaults to 2s.
         graceful_shutdown_timeout_s (Optional[float]):
             Controller waits for this duration to forcefully kill the replica
             for shutdown. Defaults to 20s.
@@ -107,15 +107,15 @@ def to_proto_bytes(self):
         if data.get("autoscaling_config"):
             data["autoscaling_config"] = AutoscalingConfigProto(
                 **data["autoscaling_config"])
-        return BackendConfigProto(
+        return DeploymentConfigProto(
             is_cross_language=False,
-            backend_language=BackendLanguage.PYTHON,
+            deployment_language=DeploymentLanguage.PYTHON,
             **data,
         ).SerializeToString()
 
     @classmethod
     def from_proto_bytes(cls, proto_bytes: bytes):
-        proto = BackendConfigProto.FromString(proto_bytes)
+        proto = DeploymentConfigProto.FromString(proto_bytes)
         data = MessageToDict(
             proto,
             including_default_value_fields=True,
@@ -131,37 +131,36 @@ def from_proto_bytes(cls, proto_bytes: bytes):
 
         # Delete fields which are only used in protobuf, not in Python.
         del data["is_cross_language"]
-        del data["backend_language"]
+        del data["deployment_language"]
 
         return cls(**data)
 
 
 class ReplicaConfig:
     def __init__(self,
-                 backend_def: Callable,
+                 deployment_def: Callable,
                  init_args: Optional[Tuple[Any]] = None,
                  init_kwargs: Optional[Dict[Any, Any]] = None,
                  ray_actor_options=None):
-        # Validate that backend_def is an import path, function, or class.
-        if isinstance(backend_def, str):
-            self.func_or_class_name = backend_def
-            pass
-        elif inspect.isfunction(backend_def):
-            self.func_or_class_name = backend_def.__name__
+        # Validate that deployment_def is an import path, function, or class.
+        if isinstance(deployment_def, str):
+            self.func_or_class_name = deployment_def
+        elif inspect.isfunction(deployment_def):
+            self.func_or_class_name = deployment_def.__name__
             if init_args:
                 raise ValueError(
-                    "init_args not supported for function backend.")
+                    "init_args not supported for function deployments.")
             if init_kwargs:
                 raise ValueError(
-                    "init_kwargs not supported for function backend.")
-        elif inspect.isclass(backend_def):
-            self.func_or_class_name = backend_def.__name__
+                    "init_kwargs not supported for function deployments.")
+        elif inspect.isclass(deployment_def):
+            self.func_or_class_name = deployment_def.__name__
         else:
             raise TypeError(
-                "Backend must be an import path, function or class, it is {}.".
-                format(type(backend_def)))
+                "Deployment must be a function or class, it is {}.".format(
+                    type(deployment_def)))
 
-        self.serialized_backend_def = cloudpickle.dumps(backend_def)
+        self.serialized_deployment_def = cloudpickle.dumps(deployment_def)
         self.init_args = init_args if init_args is not None else ()
         self.init_kwargs = init_kwargs if init_kwargs is not None else {}
         if ray_actor_options is None:
@@ -175,7 +174,7 @@ def __init__(self,
     def _validate(self):
 
         if "placement_group" in self.ray_actor_options:
-            raise ValueError("Providing placement_group for backend actors "
+            raise ValueError("Providing placement_group for deployment actors "
                              "is not currently supported.")
 
         if not isinstance(self.ray_actor_options, dict):
diff --git a/python/ray/serve/controller.py b/python/ray/serve/controller.py
index e1dd906a398a..cbebddafa0c4 100644
--- a/python/ray/serve/controller.py
+++ b/python/ray/serve/controller.py
@@ -20,7 +20,7 @@
     NodeId,
     RunningReplicaInfo,
 )
-from ray.serve.config import BackendConfig, HTTPOptions, ReplicaConfig
+from ray.serve.config import DeploymentConfig, HTTPOptions, ReplicaConfig
 from ray.serve.constants import CONTROL_LOOP_PERIOD_S, SERVE_ROOT_URL_ENV_KEY
 from ray.serve.endpoint_state import EndpointState
 from ray.serve.http_state import HTTPState
@@ -142,7 +142,7 @@ def autoscale(self) -> None:
         """Updates autoscaling deployments with calculated num_replicas."""
         for deployment_name, (backend_info,
                               route_prefix) in self.list_deployments().items():
-            backend_config = backend_info.backend_config
+            deployment_config = backend_info.deployment_config
             autoscaling_policy = backend_info.autoscaling_policy
 
             if autoscaling_policy is None:
@@ -166,16 +166,16 @@ def autoscale(self) -> None:
             if len(current_num_ongoing_requests) == 0:
                 continue
 
-            new_backend_config = backend_config.copy()
+            new_deployment_config = deployment_config.copy()
 
             decision_num_replicas = (
                 autoscaling_policy.get_decision_num_replicas(
                     current_num_ongoing_requests=current_num_ongoing_requests,
-                    curr_target_num_replicas=backend_config.num_replicas))
-            new_backend_config.num_replicas = decision_num_replicas
+                    curr_target_num_replicas=deployment_config.num_replicas))
+            new_deployment_config.num_replicas = decision_num_replicas
 
             new_backend_info = copy(backend_info)
-            new_backend_info.backend_config = new_backend_config
+            new_backend_info.deployment_config = new_deployment_config
 
             goal_id, updating = self.backend_state_manager.deploy_backend(
                 deployment_name, new_backend_info)
@@ -275,7 +275,7 @@ async def shutdown(self) -> List[GoalId]:
 
     def deploy(self,
                name: str,
-               backend_config_proto_bytes: bytes,
+               deployment_config_proto_bytes: bytes,
                replica_config: ReplicaConfig,
                version: Optional[str],
                prev_version: Optional[str],
@@ -285,8 +285,8 @@ def deploy(self,
         if route_prefix is not None:
             assert route_prefix.startswith("/")
 
-        backend_config = BackendConfig.from_proto_bytes(
-            backend_config_proto_bytes)
+        deployment_config = DeploymentConfig.from_proto_bytes(
+            deployment_config_proto_bytes)
 
         if prev_version is not None:
             existing_backend_info = self.backend_state_manager.get_backend(
@@ -301,10 +301,10 @@ def deploy(self,
                                  "does not match with the existing "
                                  f"version '{existing_backend_info.version}'.")
 
-        autoscaling_config = backend_config.autoscaling_config
+        autoscaling_config = deployment_config.autoscaling_config
         if autoscaling_config is not None:
             # TODO: is this the desired behaviour? Should this be a setting?
-            backend_config.num_replicas = autoscaling_config.min_replicas
+            deployment_config.num_replicas = autoscaling_config.min_replicas
 
             autoscaling_policy = BasicAutoscalingPolicy(autoscaling_config)
         else:
@@ -312,10 +312,10 @@ def deploy(self,
 
         backend_info = BackendInfo(
             actor_def=ray.remote(
-                create_replica_wrapper(name,
-                                       replica_config.serialized_backend_def)),
+                create_replica_wrapper(
+                    name, replica_config.serialized_deployment_def)),
             version=version,
-            backend_config=backend_config,
+            deployment_config=deployment_config,
             replica_config=replica_config,
             deployer_job_id=deployer_job_id,
             start_time_ms=int(time.time() * 1000),
@@ -373,6 +373,6 @@ def list_deployments(self, include_deleted: Optional[bool] = False
             name: (self.backend_state_manager.get_backend(
                 name, include_deleted=include_deleted),
                    self.endpoint_state.get_endpoint_route(name))
-            for name in self.backend_state_manager.get_backend_configs(
+            for name in self.backend_state_manager.get_deployment_configs(
                 include_deleted=include_deleted)
         }
diff --git a/python/ray/serve/replica.py b/python/ray/serve/replica.py
index a85ed390b71c..45db316106a7 100644
--- a/python/ray/serve/replica.py
+++ b/python/ray/serve/replica.py
@@ -15,7 +15,7 @@
 
 from ray.serve.autoscaling_metrics import start_metrics_pusher
 from ray.serve.common import BackendTag, ReplicaTag
-from ray.serve.config import BackendConfig
+from ray.serve.config import DeploymentConfig
 from ray.serve.http_util import ASGIHTTPSender
 from ray.serve.utils import parse_request_item, _get_logger
 from ray.serve.exceptions import RayServeException
@@ -31,30 +31,30 @@
 logger = _get_logger()
 
 
-def create_replica_wrapper(name: str, serialized_backend_def: bytes):
+def create_replica_wrapper(name: str, serialized_deployment_def: bytes):
     """Creates a replica class wrapping the provided function or class.
 
     This approach is picked over inheritance to avoid conflict between user
     provided class and the RayServeReplica class.
     """
-    serialized_backend_def = serialized_backend_def
+    serialized_deployment_def = serialized_deployment_def
 
     # TODO(architkulkarni): Add type hints after upgrading cloudpickle
     class RayServeWrappedReplica(object):
         async def __init__(self, backend_tag, replica_tag, init_args,
-                           init_kwargs, backend_config_proto_bytes: bytes,
+                           init_kwargs, deployment_config_proto_bytes: bytes,
                            version: DeploymentVersion, controller_name: str,
                            detached: bool):
-            backend = cloudpickle.loads(serialized_backend_def)
-            backend_config = BackendConfig.from_proto_bytes(
-                backend_config_proto_bytes)
+            backend = cloudpickle.loads(serialized_deployment_def)
+            deployment_config = DeploymentConfig.from_proto_bytes(
+                deployment_config_proto_bytes)
 
             if inspect.isfunction(backend):
                 is_function = True
             elif inspect.isclass(backend):
                 is_function = False
             else:
-                assert False, ("backend_def must be function, class, or "
+                assert False, ("deployment_def must be function, class, or "
                                "corresponding import path.")
 
             # Set the controller name so that serve.connect() in the user's
@@ -85,10 +85,10 @@ async def __init__(self, backend_tag, replica_tag, init_args,
                 detached)
             controller_handle = ray.get_actor(
                 controller_name, namespace=controller_namespace)
-            self.backend = RayServeReplica(_callable, backend_tag, replica_tag,
-                                           backend_config,
-                                           backend_config.user_config, version,
-                                           is_function, controller_handle)
+            self.backend = RayServeReplica(
+                _callable, backend_tag, replica_tag, deployment_config,
+                deployment_config.user_config, version, is_function,
+                controller_handle)
 
             # asyncio.Event used to signal that the replica is shutting down.
             self.shutdown_event = asyncio.Event()
@@ -109,14 +109,14 @@ async def handle_request(
             return await self.backend.handle_request(query)
 
         async def reconfigure(self, user_config: Optional[Any] = None
-                              ) -> Tuple[BackendConfig, DeploymentVersion]:
+                              ) -> Tuple[DeploymentConfig, DeploymentVersion]:
             if user_config is not None:
                 await self.backend.reconfigure(user_config)
 
             return self.get_metadata()
 
-        def get_metadata(self) -> Tuple[BackendConfig, DeploymentVersion]:
-            return self.backend.backend_config, self.backend.version
+        def get_metadata(self) -> Tuple[DeploymentConfig, DeploymentVersion]:
+            return self.backend.deployment_config, self.backend.version
 
         async def prepare_for_shutdown(self):
             self.shutdown_event.set()
@@ -146,10 +146,10 @@ class RayServeReplica:
     """Handles requests with the provided callable."""
 
     def __init__(self, _callable: Callable, backend_tag: BackendTag,
-                 replica_tag: ReplicaTag, backend_config: BackendConfig,
+                 replica_tag: ReplicaTag, deployment_config: DeploymentConfig,
                  user_config: Any, version: DeploymentVersion,
                  is_function: bool, controller_handle: ActorHandle) -> None:
-        self.backend_config = backend_config
+        self.deployment_config = deployment_config
         self.backend_tag = backend_tag
         self.replica_tag = replica_tag
         self.callable = _callable
@@ -211,10 +211,10 @@ def __init__(self, _callable: Callable, backend_tag: BackendTag,
         self.restart_counter.inc()
 
         self._shutdown_wait_loop_s = (
-            backend_config.graceful_shutdown_wait_loop_s)
+            deployment_config.graceful_shutdown_wait_loop_s)
 
-        if backend_config.autoscaling_config:
-            config = backend_config.autoscaling_config
+        if deployment_config.autoscaling_config:
+            config = deployment_config.autoscaling_config
             start_metrics_pusher(
                 interval_s=config.metrics_interval_s,
                 collection_callback=self._collect_autoscaling_metrics,
@@ -319,7 +319,8 @@ async def reconfigure(self, user_config: Any):
         self.version = DeploymentVersion(
             self.version.code_version, user_config=user_config)
         if self.is_function:
-            raise ValueError("backend_def must be a class to use user_config")
+            raise ValueError(
+                "deployment_def must be a class to use user_config")
         elif not hasattr(self.callable, BACKEND_RECONFIGURE_METHOD):
             raise RayServeException("user_config specified but backend " +
                                     self.backend_tag + " missing " +
diff --git a/python/ray/serve/tests/test_backend_state.py b/python/ray/serve/tests/test_backend_state.py
index 74a170a6f1c6..687382945249 100644
--- a/python/ray/serve/tests/test_backend_state.py
+++ b/python/ray/serve/tests/test_backend_state.py
@@ -8,7 +8,7 @@
 
 from ray.actor import ActorHandle
 from ray.serve.common import (
-    BackendConfig,
+    DeploymentConfig,
     BackendInfo,
     BackendTag,
     ReplicaConfig,
@@ -130,7 +130,7 @@ def available_resources(self) -> Dict[str, float]:
     def graceful_stop(self) -> None:
         assert self.started
         self.stopped = True
-        return self.backend_info.backend_config.graceful_shutdown_timeout_s
+        return self.backend_info.deployment_config.graceful_shutdown_timeout_s
 
     def check_stopped(self) -> bool:
         return self.done_stopping
@@ -154,7 +154,7 @@ def backend_info(version: Optional[str] = None,
         actor_def=None,
         version=version,
         start_time_ms=0,
-        backend_config=BackendConfig(
+        deployment_config=DeploymentConfig(
             num_replicas=num_replicas, user_config=user_config, **config_opts),
         replica_config=ReplicaConfig(lambda x: x))
 
@@ -163,7 +163,8 @@ def backend_info(version: Optional[str] = None,
     else:
         code_version = get_random_letters()
 
-    version = DeploymentVersion(code_version, info.backend_config.user_config)
+    version = DeploymentVersion(code_version,
+                                info.deployment_config.user_config)
 
     return info, version
 
diff --git a/python/ray/serve/tests/test_config.py b/python/ray/serve/tests/test_config.py
index 505147055e34..f8e96487434d 100644
--- a/python/ray/serve/tests/test_config.py
+++ b/python/ray/serve/tests/test_config.py
@@ -1,29 +1,29 @@
 import pytest
 from pydantic import ValidationError
 
-from ray.serve.config import (BackendConfig, DeploymentMode, HTTPOptions,
+from ray.serve.config import (DeploymentConfig, DeploymentMode, HTTPOptions,
                               ReplicaConfig)
 from ray.serve.config import AutoscalingConfig
 
 
-def test_backend_config_validation():
+def test_deployment_config_validation():
     # Test unknown key.
     with pytest.raises(ValidationError):
-        BackendConfig(unknown_key=-1)
+        DeploymentConfig(unknown_key=-1)
 
     # Test num_replicas validation.
-    BackendConfig(num_replicas=1)
+    DeploymentConfig(num_replicas=1)
     with pytest.raises(ValidationError, match="type_error"):
-        BackendConfig(num_replicas="hello")
+        DeploymentConfig(num_replicas="hello")
     with pytest.raises(ValidationError, match="value_error"):
-        BackendConfig(num_replicas=-1)
+        DeploymentConfig(num_replicas=-1)
 
     # Test dynamic default for max_concurrent_queries.
-    assert BackendConfig().max_concurrent_queries == 100
+    assert DeploymentConfig().max_concurrent_queries == 100
 
 
-def test_backend_config_update():
-    b = BackendConfig(num_replicas=1, max_concurrent_queries=1)
+def test_deployment_config_update():
+    b = DeploymentConfig(num_replicas=1, max_concurrent_queries=1)
 
     # Test updating a key works.
     b.num_replicas = 2
@@ -108,18 +108,18 @@ def test_http_options():
 
 def test_with_proto():
     # Test roundtrip
-    config = BackendConfig(num_replicas=100, max_concurrent_queries=16)
-    assert config == BackendConfig.from_proto_bytes(config.to_proto_bytes())
+    config = DeploymentConfig(num_replicas=100, max_concurrent_queries=16)
+    assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes())
 
     # Test user_config object
-    config = BackendConfig(user_config={"python": ("native", ["objects"])})
-    assert config == BackendConfig.from_proto_bytes(config.to_proto_bytes())
+    config = DeploymentConfig(user_config={"python": ("native", ["objects"])})
+    assert config == DeploymentConfig.from_proto_bytes(config.to_proto_bytes())
 
 
 def test_zero_default_proto():
     # Test that options set to zero (protobuf default value) still retain their
     # original value after being serialized and deserialized.
-    config = BackendConfig(
+    config = DeploymentConfig(
         autoscaling_config={
             "min_replicas": 1,
             "max_replicas": 2,
@@ -127,7 +127,7 @@ def test_zero_default_proto():
             "downscale_delay_s": 0
         })
     serialized_config = config.to_proto_bytes()
-    deserialized_config = BackendConfig.from_proto_bytes(serialized_config)
+    deserialized_config = DeploymentConfig.from_proto_bytes(serialized_config)
     new_delay_s = deserialized_config.autoscaling_config.downscale_delay_s
     assert new_delay_s == 0
 
diff --git a/src/ray/protobuf/serve.proto b/src/ray/protobuf/serve.proto
index 2cadc2128dfb..5bfa6775136d 100644
--- a/src/ray/protobuf/serve.proto
+++ b/src/ray/protobuf/serve.proto
@@ -51,21 +51,21 @@ message AutoscalingConfig {
   double upscale_delay_s = 8;
 }
 
-// Configuration options for a backend, to be set by the user.
-message BackendConfig {
-  // The number of processes to start up that will handle requests to this backend.
+// Configuration options for a deployment, to be set by the user.
+message DeploymentConfig {
+  // The number of processes to start up that will handle requests to this deployment.
   // Defaults to 1.
   int32 num_replicas = 1;
 
-  // The maximum number of queries that will be sent to a replica of this backend without
+  // The maximum number of queries that will be sent to a replica of this deployment without
   // receiving a response. Defaults to 100.
   int32 max_concurrent_queries = 2;
 
-  // Arguments to pass to the reconfigure method of the backend. The reconfigure method is
+  // Arguments to pass to the reconfigure method of the deployment. The reconfigure method is
   // called if user_config is not None.
   bytes user_config = 3;
 
-  // Duration that backend workers will wait until there is no more work to be done before
+  // Duration that deployment replicas will wait until there is no more work to be done before
   // shutting down. Defaults to 2s.
   double graceful_shutdown_wait_loop_s = 4;
 
@@ -73,18 +73,18 @@ message BackendConfig {
   // Defaults to 20s.
   double graceful_shutdown_timeout_s = 5;
 
-  // Is the construction of backend is cross language?
+  // Is the construction of deployment is cross language?
   bool is_cross_language = 6;
 
-  // The backend's programming language.
-  BackendLanguage backend_language = 7;
+  // The deployment's programming language.
+  DeploymentLanguage deployment_language = 7;
 
-  // The backend's autoscaling configuration.
+  // The deployment's autoscaling configuration.
   AutoscalingConfig autoscaling_config = 8;
 }
 
 // Backend language.
-enum BackendLanguage {
+enum DeploymentLanguage {
   PYTHON = 0;
   JAVA = 1;
 }

From 241573d9994867453a773a09617523dbc40f3ff7 Mon Sep 17 00:00:00 2001
From: Ed Oakes <ed.nmi.oakes@gmail.com>
Date: Sun, 31 Oct 2021 16:14:46 -0500
Subject: [PATCH 2/2] fix lint

---
 src/ray/protobuf/serve.proto | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/ray/protobuf/serve.proto b/src/ray/protobuf/serve.proto
index 5bfa6775136d..20f451f1129d 100644
--- a/src/ray/protobuf/serve.proto
+++ b/src/ray/protobuf/serve.proto
@@ -57,16 +57,16 @@ message DeploymentConfig {
   // Defaults to 1.
   int32 num_replicas = 1;
 
-  // The maximum number of queries that will be sent to a replica of this deployment without
-  // receiving a response. Defaults to 100.
+  // The maximum number of queries that will be sent to a replica of this deployment
+  // without receiving a response. Defaults to 100.
   int32 max_concurrent_queries = 2;
 
-  // Arguments to pass to the reconfigure method of the deployment. The reconfigure method is
-  // called if user_config is not None.
+  // Arguments to pass to the reconfigure method of the deployment. The reconfigure method
+  // is called if user_config is not None.
   bytes user_config = 3;
 
-  // Duration that deployment replicas will wait until there is no more work to be done before
-  // shutting down. Defaults to 2s.
+  // Duration that deployment replicas will wait until there is no more work to be done
+  // before shutting down. Defaults to 2s.
   double graceful_shutdown_wait_loop_s = 4;
 
   // Controller waits for this duration to forcefully kill the replica for shutdown.