From d6fbdd5a5987a3d01b80c3703610fd0a5249b1fc Mon Sep 17 00:00:00 2001 From: Andrew Sy Kim Date: Tue, 10 Sep 2024 18:03:50 -0400 Subject: [PATCH] add support for pipeline-parallel-size in vLLM example (#2370) Signed-off-by: Andrew Sy Kim --- ray-operator/config/samples/vllm/ray-service.vllm.yaml | 1 + ray-operator/config/samples/vllm/serve.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ray-operator/config/samples/vllm/ray-service.vllm.yaml b/ray-operator/config/samples/vllm/ray-service.vllm.yaml index 233ee2211b..1efdfba85d 100644 --- a/ray-operator/config/samples/vllm/ray-service.vllm.yaml +++ b/ray-operator/config/samples/vllm/ray-service.vllm.yaml @@ -20,6 +20,7 @@ spec: env_vars: MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct" TENSOR_PARALLELISM: "2" + PIPELINE_PARALLELISM: "1" rayClusterConfig: headGroupSpec: rayStartParams: diff --git a/ray-operator/config/samples/vllm/serve.py b/ray-operator/config/samples/vllm/serve.py index 3df203daf6..353cff255b 100644 --- a/ray-operator/config/samples/vllm/serve.py +++ b/ray-operator/config/samples/vllm/serve.py @@ -122,4 +122,4 @@ def build_app(cli_args: Dict[str, str]) -> serve.Application: model = build_app( - {"model": os.environ['MODEL_ID'], "tensor-parallel-size": os.environ['TENSOR_PARALLELISM']}) + {"model": os.environ['MODEL_ID'], "tensor-parallel-size": os.environ['TENSOR_PARALLELISM'], "pipeline-parallel-size": os.environ['PIPELINE_PARALLELISM']})