From a6777bdf2b4c6ae683f3ea4cbacfdfb1e37eebb2 Mon Sep 17 00:00:00 2001 From: Seokho Son Date: Wed, 28 Feb 2024 08:47:19 +0900 Subject: [PATCH] Apply concurrent model loading --- scripts/usecases/llm/llmServer.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/scripts/usecases/llm/llmServer.py b/scripts/usecases/llm/llmServer.py index 967023375..a1ba1cc21 100644 --- a/scripts/usecases/llm/llmServer.py +++ b/scripts/usecases/llm/llmServer.py @@ -2,9 +2,9 @@ from fastapi import BackgroundTasks, FastAPI, Request from fastapi.responses import JSONResponse import uvicorn -# Correcting the import based on your initial code snippet +from concurrent.futures import ThreadPoolExecutor from langchain_community.llms import VLLM -import asyncio # Import asyncio for asynchronous task management + app = FastAPI() port = 5001 @@ -15,19 +15,18 @@ model_loaded = False llm = None -async def load_model(): +def load_model(): global llm, model_loaded - # Create and initialize the model instance llm = VLLM(model=model, - trust_remote_code=True, # Required for loading HF models + trust_remote_code=True, max_new_tokens=50, - temperature=0.6 - ) - model_loaded = True # Update model loading status to True + temperature=0.6) + model_loaded = True @app.on_event("startup") -async def startup_event(): - asyncio.create_task(load_model()) # Schedule load_model to run as a background task +def startup_event(): + with ThreadPoolExecutor(max_workers=1) as executor: + executor.submit(load_model) @app.get("/status") def get_status():