Skip to content

Commit

Permalink
[release-0.12] Update fastapi to 0.109.1 and Support ray 2.10 (kserve…
Browse files Browse the repository at this point in the history
…#3609)

* CVE-2024-24762 - update fastapi to 0.109.1

chore:	Fix [CVE-2024-24762](https://www.cve.org/CVERecord?id=CVE-2024-24762) - fastapi Regular Expression Denial of Service (ReDoS)
	Plus, update Ray to 2.10 to allow updating fastapi. On previous versions of Ray
	the fastapi version was pinned, which was preventing the fastapi version update.

use the new handle api:

From Ray Serve docs:
Ray 2.7 introduces a new {mod}`DeploymentHandle <ray.serve.handle.DeploymentHandle>` API that will replace the existing `RayServeHandle` and `RayServeSyncHandle` APIs.

Signed-off-by: Spolti <[email protected]>
Signed-off-by: Sivanantham Chinnaiyan <[email protected]>

* add link to about the RayServeHandle deprecation

Signed-off-by: Spolti <[email protected]>
Signed-off-by: Sivanantham Chinnaiyan <[email protected]>

* restrict pydantic version <1,>2

Signed-off-by: Sivanantham Chinnaiyan <[email protected]>

---------

Signed-off-by: Spolti <[email protected]>
Signed-off-by: Sivanantham Chinnaiyan <[email protected]>
Co-authored-by: Spolti <[email protected]>
  • Loading branch information
sivanantha321 and spolti authored Apr 17, 2024
1 parent c9570d6 commit ac9bf1a
Show file tree
Hide file tree
Showing 19 changed files with 14,794 additions and 14,075 deletions.
1,812 changes: 927 additions & 885 deletions python/aiffairness/poetry.lock

Large diffs are not rendered by default.

2,481 changes: 1,221 additions & 1,260 deletions python/alibiexplainer/poetry.lock

Large diffs are not rendered by default.

2,314 changes: 1,278 additions & 1,036 deletions python/artexplainer/poetry.lock

Large diffs are not rendered by default.

1,965 changes: 1,006 additions & 959 deletions python/custom_model/poetry.lock

Large diffs are not rendered by default.

1,512 changes: 771 additions & 741 deletions python/custom_tokenizer/poetry.lock

Large diffs are not rendered by default.

1,977 changes: 1,012 additions & 965 deletions python/custom_transformer/poetry.lock

Large diffs are not rendered by default.

1,380 changes: 699 additions & 681 deletions python/huggingfaceserver/poetry.lock

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions python/kserve/kserve/model_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from typing import Dict, Optional, Union
from .model import Model
from ray.serve.handle import RayServeHandle
from ray.serve.handle import DeploymentHandle
import os

MODEL_MOUNT_DIRS = "/mnt/models"
Expand All @@ -30,7 +30,7 @@ class ModelRepository:
"""

def __init__(self, models_dir: str = MODEL_MOUNT_DIRS):
self.models: Dict[str, Union[Model, RayServeHandle]] = {}
self.models: Dict[str, Union[Model, DeploymentHandle]] = {}
self.models_dir = models_dir

def load_models(self):
Expand All @@ -42,10 +42,10 @@ def load_models(self):
def set_models_dir(self, models_dir): # used for unit tests
self.models_dir = models_dir

def get_model(self, name: str) -> Optional[Union[Model, RayServeHandle]]:
def get_model(self, name: str) -> Optional[Union[Model, DeploymentHandle]]:
return self.models.get(name, None)

def get_models(self) -> Dict[str, Union[Model, RayServeHandle]]:
def get_models(self) -> Dict[str, Union[Model, DeploymentHandle]]:
return self.models

def is_model_ready(self, name: str):
Expand All @@ -61,7 +61,7 @@ def is_model_ready(self, name: str):
def update(self, model: Model):
self.models[model.name] = model

def update_handle(self, name: str, model_handle: RayServeHandle):
def update_handle(self, name: str, model_handle: DeploymentHandle):
self.models[name] = model_handle

def load(self, name: str) -> bool:
Expand Down
28 changes: 20 additions & 8 deletions python/kserve/kserve/model_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from ray import serve as rayserve
from ray.serve.api import Deployment
from ray.serve.handle import RayServeHandle
from ray.serve.handle import DeploymentHandle

from .logging import KSERVE_LOG_CONFIG, logger
from .model import Model
Expand All @@ -48,8 +48,8 @@
help="The number of uvicorn workers for multi-processing.")
parser.add_argument("--max_threads", default=4, type=int,
help="The max number of gRPC processing threads.")
parser.add_argument('--max_asyncio_workers', default=None, type=int,
help='The max number of asyncio workers to spawn.')
parser.add_argument("--max_asyncio_workers", default=None, type=int,
help="The max number of asyncio workers to spawn.")
parser.add_argument("--enable_grpc", default=True, type=lambda x: utils.strtobool(x),
help="Enable gRPC for the model server.")
parser.add_argument("--enable_docs_url", default=False, type=lambda x: utils.strtobool(x),
Expand Down Expand Up @@ -227,6 +227,9 @@ async def servers_task():

async def stop(self, sig: Optional[int] = None):
""" Stop the instances of REST and gRPC model servers.
Args:
sig: The signal to stop the server. Default: ``None``.
"""
logger.info("Stopping the model server")
if self._rest_server:
Expand All @@ -237,8 +240,7 @@ async def stop(self, sig: Optional[int] = None):
await self._grpc_server.stop(sig)

def register_exception_handler(self, handler: Callable[[asyncio.events.AbstractEventLoop, Dict[str, Any]], None]):
"""
Add a custom handler as the event loop exception handler.
"""Add a custom handler as the event loop exception handler.
If a handler is not provided, the default exception handler will be set.
Expand All @@ -249,8 +251,7 @@ def register_exception_handler(self, handler: Callable[[asyncio.events.AbstractE
self._custom_exception_handler = handler

def default_exception_handler(self, loop: asyncio.events.AbstractEventLoop, context: Dict[str, Any]):
"""
Default exception handler for event loop.
"""Default exception handler for event loop.
This is called when an exception occurs and no exception handler is set.
By default, this will shut down the server gracefully.
Expand All @@ -261,11 +262,22 @@ def default_exception_handler(self, loop: asyncio.events.AbstractEventLoop, cont
loop.run_until_complete(self.stop())
loop.default_exception_handler(context)

def register_model_handle(self, name: str, model_handle: RayServeHandle):
def register_model_handle(self, name: str, model_handle: DeploymentHandle):
"""Register a model handle to the model server.
Args:
name: The name of the model handle.
model_handle: The model handle object.
"""
self.registered_models.update_handle(name, model_handle)
logger.info("Registering model handle: %s", name)

def register_model(self, model: Model):
"""Register a model to the model server.
Args:
model: The model object.
"""
if not model.name:
raise Exception(
"Failed to register model, model.name must be provided.")
Expand Down
21 changes: 8 additions & 13 deletions python/kserve/kserve/protocol/dataplane.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

import cloudevents.exceptions as ce
import orjson
import ray

from cloudevents.http import CloudEvent, from_http
from cloudevents.sdk.converters.util import has_binary_headers
from ray.serve.handle import RayServeHandle, RayServeSyncHandle, DeploymentHandle
from ray.serve.handle import DeploymentHandle

from .rest.v2_datamodels import GenerateRequest, GenerateResponse
from ..model import Model
Expand All @@ -39,7 +39,9 @@
# RayServeSyncHandle has been the return type of serve.run since Ray 2.5.
# DeploymentHandle will be the new return type (still under feature flag in Ray 2.7).
# ref https://github.com/ray-project/ray/pull/37817
ModelHandleType = Union[Model, RayServeHandle, RayServeSyncHandle, DeploymentHandle]
# On Ray 2.10, it now returns DeploymentHandle:
# https://docs.ray.io/en/latest/serve/api/index.html#deployment-handles
ModelHandleType = Union[Model, DeploymentHandle]


class DataPlane:
Expand Down Expand Up @@ -170,10 +172,7 @@ async def model_metadata(self, model_name: str) -> Dict:
# TODO: model versioning is not supported yet
model = self.get_model_from_registry(model_name)

if isinstance(model, RayServeSyncHandle):
input_types = ray.get(model.get_input_types.remote())
output_types = ray.get(model.get_output_types.remote())
elif isinstance(model, (RayServeHandle, DeploymentHandle)):
if isinstance(model, DeploymentHandle):
input_types = await model.get_input_types.remote()
output_types = await model.get_output_types.remote()
else:
Expand Down Expand Up @@ -316,9 +315,7 @@ async def infer(
"""
# call model locally or remote model workers
model = self.get_model(model_name)
if isinstance(model, RayServeSyncHandle):
response = ray.get(model.remote(request, headers=headers))
elif isinstance(model, (RayServeHandle, DeploymentHandle)):
if isinstance(model, DeploymentHandle):
response = await model.remote(request, headers=headers)
else:
response = await model(request, headers=headers)
Expand Down Expand Up @@ -367,9 +364,7 @@ async def explain(self, model_name: str,
"""
# call model locally or remote model workers
model = self.get_model(model_name)
if isinstance(model, RayServeSyncHandle):
response = ray.get(model.remote(request, verb=InferenceVerb.EXPLAIN))
elif isinstance(model, (RayServeHandle, DeploymentHandle)):
if isinstance(model, DeploymentHandle):
response = await model.remote(request, verb=InferenceVerb.EXPLAIN)
else:
response = await model(request, verb=InferenceVerb.EXPLAIN)
Expand Down
Loading

0 comments on commit ac9bf1a

Please sign in to comment.