Maintaining current behavior

microsoft · Jun 28, 2023 · c21c31b · c21c31b
1 parent 20f0878
commit c21c31b
Show file tree

Hide file tree

Showing 5 changed files with 32 additions and 22 deletions.
diff --git a/mii/config.py b/mii/config.py
@@ -107,6 +107,7 @@ class Config:
 
 
 class ReplicaConfig(BaseModel):
+    deployment_name: str = ""
     hostname: str = ""
     tensor_parallel_ports: List[int] = []
     torch_dist_port: int = None

diff --git a/mii/deployment.py b/mii/deployment.py
@@ -16,8 +16,15 @@
 from .config import ReplicaConfig, LoadBalancerConfig
 
 
-def deploy(deployment_tag,
-           deployments,
+def deploy(task=None,
+           model=None,
+           deployment_name=None,
+           enable_deepspeed=True,
+           enable_zero=False,
+           ds_config=None,
+           mii_config={},
+           deployment_tag=None,
+           deployments=[],
            deployment_type=DeploymentType.LOCAL,
            model_path=None):
     """Deploy a task using specified model. For usage examples see:
@@ -60,6 +67,13 @@ def deploy(deployment_tag,
         If deployment_type is `LOCAL`, returns just the name of the deployment that can be used to create a query handle using `mii.mii_query_handle(deployment_name)`
 
     """
+    if len(deployments == 0):
+        assert model is not None and task is not None and deployment_name is not None, "model, task, and deployment name must be set to deploy sigular model"
+        deployments = [Deployment(deployment_name, task, model, enable_deepspeed, enable_zero, None, mii_config, ds_config, version)]
+        deployment_tag = deployment_name + "_tag"
+    else:
+        assert deployment_tag is not None, "deployment_tag must be set to deploy multiple models"
+
     mii.multi_model_deployments[deployment_tag] = deployments
     ports = set()
     # parse and validate mii config

diff --git a/mii/models/score/generate.py b/mii/models/score/generate.py
@@ -13,7 +13,7 @@ def create_score_file(deployment_tag,
                       deployment_type,
                       deployments,
                       model_path,
-                      lb_config)
+                      lb_config):
 
     config_dict = {}
     config_dict[mii.constants.MODEL_PATH_KEY] = model_path
@@ -46,7 +46,7 @@ def create_score_file(deployment_tag,
     source_with_config += f"configs = {pprint.pformat(config_dict, indent=4)}"
 
     with open(generated_score_path(deployment_tag, deployment_type), "w") as fd:
-        fd.write(source_with_config):
+        fd.write(source_with_config)
         fd.write("\n")
 
 

diff --git a/mii/models/score/score_template.py b/mii/models/score/score_template.py
@@ -16,6 +16,8 @@
 
 def init():
     model_path = mii.utils.full_model_path(configs[mii.constants.MODEL_PATH_KEY])
+    deployment_tag = configs[mii.constants.DEPLOYMENT_TAG_KEY]
+    deployments = mii.multi_model_deployments[deployment_tag]
 
     deployment_name = configs[mii.constants.DEPLOYMENT_NAME_KEY]
     model_name = configs[mii.constants.MODEL_NAME_KEY]

diff --git a/mii/server.py b/mii/server.py
@@ -50,13 +50,9 @@ def __init__(self,
                 f.write(f"localhost slots={num_gpu}")
             mii.configs.hostfile = hostfile
 
-        processes = self._initialize_service(deployment_name,
-                                             model_name,
+        processes = self._initialize_service(deployment_tag,
+                                             deployments,
                                              model_path,
-                                             ds_optimize,
-                                             ds_zero,
-                                             ds_config,
-                                             mii_configs,
                                              lb_config)
         self._wait_until_server_is_live(processes, lb_config.replica_configs)
 
@@ -273,13 +269,9 @@ def _launch_deepspeed(self,
                                            ds_launch_str=ds_launch_str)
 
     def _initialize_service(self,
-                            deployment_name,
-                            model_name,
+                            deployment_tag,
+                            deployments,
                             model_path,
-                            ds_optimize,
-                            ds_zero,
-                            ds_config,
-                            mii_configs,
                             lb_config):
 
         processes = []
@@ -290,19 +282,20 @@ def _initialize_service(self,
 
         # Start replica instances
         for i, repl_config in enumerate(lb_config.replica_configs):
+            name = repl_config.deployment_name
             hostfile = tempfile.NamedTemporaryFile(delete=False)
             hostfile.write(
                 f'{repl_config.hostname} slots={max(host_gpus[repl_config.hostname])+1}\n'
                 .encode())
             processes.append(
                 self._launch_deepspeed(
-                    deployment_name,
-                    model_name,
+                    name,
+                    deployments[name].model,
                     model_path,
-                    ds_optimize,
-                    ds_zero,
-                    ds_config,
-                    mii_configs,
+                    deployments[name].enable_deepspeed,
+                    deployments[name].enable_zero,
+                    deployments[name].ds_config,
+                    deployments[name].mii_configs,
                     hostfile.name,
                     repl_config.hostname,
                     repl_config.tensor_parallel_ports[0],