Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Non-persistent deployment type #197

Merged
merged 48 commits into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
a8064ac
Update constants.py
TosinSeg May 31, 2023
ef8a774
Adding persistent model global variable
TosinSeg Jun 1, 2023
b54034e
Storing persistent model as a dictionary
TosinSeg Jun 1, 2023
c889580
Non-persistent updates
TosinSeg Jun 2, 2023
dd65681
Update constants.py
TosinSeg May 31, 2023
229095c
Adding persistent model global variable
TosinSeg Jun 1, 2023
b36c500
Storing persistent model as a dictionary
TosinSeg Jun 1, 2023
e85f63b
Non-persistent updates
TosinSeg Jun 2, 2023
e4e618d
merged main branch
Jun 6, 2023
eeba401
Reformatting structure of the mii.persistent_model dict
Jun 6, 2023
41eaba5
Reformatting structure of the mii.persistent_model dict
TosinSeg Jun 6, 2023
a44095e
Merge branch 'Non-persistent-deployment' of https://github.com/TosinS…
TosinSeg Jun 6, 2023
6f09d57
Added tests for Non-persistent-deployment
Jun 6, 2023
c00b388
New test file (Should have been on the last commit)
Jun 6, 2023
efb45b1
Parsing query for conversational task
Jun 7, 2023
66d4483
Initial non_persistent test refactoring
Jun 7, 2023
08396ef
Fixing import statement
Jun 7, 2023
aaa5f61
Fixed kwargs in query
Jun 8, 2023
383da4c
Fixed fill-mask query error
Jun 8, 2023
7ad8987
Defining Kwargs
Jun 8, 2023
965e112
Added termination logic for non_persistent_model
Jun 8, 2023
ceb7da1
Fixing args parameter
Jun 8, 2023
a54cb66
Reverting run_inference change
Jun 9, 2023
49c11dc
Fixing arguments for run_inference function
Jun 9, 2023
4ae1dd8
Fixing conversational and fill-mask queries for non_persistent_deploy…
Jun 9, 2023
289250d
Refactored unit tests
Jun 9, 2023
56d647d
Refactoring Conversation task args
Jun 10, 2023
91e8845
Fixing args issues in create_conversation()
Jun 10, 2023
644c57f
refactor args to remove is_non_persistent flag
mrwyattii Jun 13, 2023
0e32ffc
refactor conversation creation
mrwyattii Jun 13, 2023
eaaeac4
refactoring run_inference calls
TosinSeg Jun 13, 2023
bbd5860
Fixing misspelled key
TosinSeg Jun 14, 2023
31bf6f7
refactoring tests
TosinSeg Jun 15, 2023
7d93d1d
Fixing getenv() call
TosinSeg Jun 15, 2023
e300fa7
Fixing create_conversation function signature
TosinSeg Jun 15, 2023
ee74603
Update mii/method_table.py
mrwyattii Jun 20, 2023
939002c
Update tests/test_non_persistent_deployment.py
mrwyattii Jun 20, 2023
f7b8e49
Refactored non_persistent deployment call
TosinSeg Jun 22, 2023
811c57c
refactoring query_handle()
TosinSeg Jun 22, 2023
eca4c73
Adding more descriptive error message
TosinSeg Jun 22, 2023
c1b8ada
Reformatting deploy parameters
TosinSeg Jun 22, 2023
df2a6ed
Updating deploy_non_persistent parameters
TosinSeg Jun 22, 2023
228e9f2
Fixing formatting
TosinSeg Jun 22, 2023
1b534bc
Added example to the readme
TosinSeg Jun 22, 2023
7f293c2
Removing deploy_non_persistent() function
TosinSeg Jun 22, 2023
e65eaf0
removing import statement from init
TosinSeg Jun 22, 2023
a881f4c
Update README.md
mrwyattii Jun 22, 2023
3ecdfc5
Update README.md
mrwyattii Jun 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mii/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .grpc_related.proto import modelresponse_pb2_grpc

__version__ = "0.0.0"
non_persistent_model = {}
TosinSeg marked this conversation as resolved.
Show resolved Hide resolved
try:
from .version import __version__
except ImportError:
Expand Down
40 changes: 33 additions & 7 deletions mii/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,21 @@ def mii_query_handle(deployment_name):
Returns:
query_handle: A query handle with a single method `.query(request_dictionary)` using which queries can be sent to the model.
"""
task_name, mii_configs = _get_deployment_info(deployment_name)
if mii_configs.enable_load_balancing:
return MIIClient(task_name, "localhost", mii_configs.port_number)
if len(mii.non_persistent_model) != 0:
assert deployment_name in mii.non_persistent_model, f"Could not find '{deployment_name}'"
TosinSeg marked this conversation as resolved.
Show resolved Hide resolved
inference_pipeline, task = mii.non_persistent_model[deployment_name]
assert task is not None, "The task name should be set before calling init"
TosinSeg marked this conversation as resolved.
Show resolved Hide resolved
return MIINonPersistentClient(task, inference_pipeline, deployment_name)

else:
TosinSeg marked this conversation as resolved.
Show resolved Hide resolved
return MIITensorParallelClient(
task_name,
"localhost",
[mii_configs.port_number + i for i in range(mii_configs.tensor_parallel)])
task_name, mii_configs = _get_deployment_info(deployment_name)
if mii_configs.enable_load_balancing:
return MIIClient(task_name, "localhost", mii_configs.port_number)
else:
return MIITensorParallelClient(
task_name,
"localhost",
[mii_configs.port_number + i for i in range(mii_configs.tensor_parallel)])


def create_channel(host, port):
Expand Down Expand Up @@ -155,6 +162,25 @@ def destroy_session(self, session_id):
for client in self.clients:
client.destroy_session(session_id)

class MIINonPersistentClient():
def __init__(self, task, inference_pipeline, deployment_name):
self.inference_pipeline = inference_pipeline
TosinSeg marked this conversation as resolved.
Show resolved Hide resolved
self.task = task
self.deployment_name = deployment_name

def query(self, request_dict, **query_kwargs):
task_methods = GRPC_METHOD_TABLE[self.task]
if self.task == Tasks.QUESTION_ANSWERING:
return task_methods.run_inference(self.inference_pipeline, request_dict, **query_kwargs)

TosinSeg marked this conversation as resolved.
Show resolved Hide resolved
query = request_dict['query']
return task_methods.run_inference(self.inference_pipeline, query, **query_kwargs)

def terminate(self):
print("Terminating ...")
del mii.persistent_model[self.deployment_name]



def terminate_restful_gateway(deployment_name):
_, mii_configs = _get_deployment_info(deployment_name)
Expand Down
2 changes: 1 addition & 1 deletion mii/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
class DeploymentType(enum.Enum):
LOCAL = 1
AML = 2

NON_PERSISTENT = 3

MII_CONFIGS_KEY = 'mii_configs'

Expand Down
8 changes: 6 additions & 2 deletions mii/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

from deepspeed.launcher.runner import fetch_hostfile

from .constants import DeploymentType, MII_MODEL_PATH_DEFAULT
from .utils import logger
from .constants import DeploymentType, MII_MODEL_PATH_DEFAULT, MODEL_PROVIDER_MAP
from .utils import logger, get_task, get_provider_name
from .models.score import create_score_file
from .config import ReplicaConfig, LoadBalancerConfig

Expand Down Expand Up @@ -140,6 +140,10 @@ def deploy(task,
_deploy_aml(deployment_name=deployment_name, model_name=model, version=version)
elif deployment_type == DeploymentType.LOCAL:
return _deploy_local(deployment_name, model_path=model_path)
elif deployment_type == DeploymentType.NON_PERSISTENT:
assert not mii_config.enable_load_balancing, "Cannot use Load Balancing with Non persistent deployment"
provider = MODEL_PROVIDER_MAP[get_provider_name(model, task)]
mii.non_persistent_model[deployment_name] = (load_models(task, model, model_path, enable_deepspeed, enable_zero, provider, mii_config), get_task(task))
TosinSeg marked this conversation as resolved.
Show resolved Hide resolved
else:
raise Exception(f"Unknown deployment type: {deployment_type}")

Expand Down
6 changes: 4 additions & 2 deletions mii/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from collections import defaultdict

import mii
from mii.utils import get_num_gpus, logger
from mii.utils import get_num_gpus, logger, get_provider_name
from mii.config import ReplicaConfig


Expand Down Expand Up @@ -120,14 +120,16 @@ def _build_server_args(self,
server_args_str += " --ds-optimize" if ds_optimize else ""

# XXX: fetch model provider based on model name in a more general way
if model_name == "gpt-neox":
"""if model_name == "gpt-neox":
provider = mii.constants.MODEL_PROVIDER_NAME_EA
elif ("bigscience/bloom" == model_name) or ("microsoft/bloom" in model_name):
provider = mii.constants.MODEL_PROVIDER_NAME_HF_LLM
elif self.task == mii.Tasks.TEXT2IMG:
provider = mii.constants.MODEL_PROVIDER_NAME_DIFFUSERS
else:
provider = mii.constants.MODEL_PROVIDER_NAME_HF
"""
TosinSeg marked this conversation as resolved.
Show resolved Hide resolved
provider = get_provider_name(model_name, self.task)
server_args_str += f" --provider {provider}"

server_args_str += f" --config {b64_config_str}"
Expand Down
10 changes: 10 additions & 0 deletions mii/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,16 @@ def get_num_gpus(mii_configs):
) >= num_gpus, f"Available GPU count: {torch.cuda.device_count()} does not meet the required gpu count: {num_gpus}"
return num_gpus

def get_provider_name(model_name, task):
if model_name == "gpt-neox":
provider = mii.constants.MODEL_PROVIDER_NAME_EA
elif ("bigscience/bloom" == model_name) or ("microsoft/bloom" in model_name):
provider = mii.constants.MODEL_PROVIDER_NAME_HF_LLM
elif task == mii.Tasks.TEXT2IMG:
provider = mii.constants.MODEL_PROVIDER_NAME_DIFFUSERS
else:
provider = mii.constants.MODEL_PROVIDER_NAME_HF
return provider

log_levels = {
"debug": logging.DEBUG,
Expand Down
Loading