Skip to content

Commit

Permalink
Apply concurrent model loading
Browse files Browse the repository at this point in the history
  • Loading branch information
seokho-son authored Feb 27, 2024
1 parent 9b140ed commit a6777bd
Showing 1 changed file with 9 additions and 10 deletions.
19 changes: 9 additions & 10 deletions scripts/usecases/llm/llmServer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from fastapi import BackgroundTasks, FastAPI, Request
from fastapi.responses import JSONResponse
import uvicorn
# Correcting the import based on your initial code snippet
from concurrent.futures import ThreadPoolExecutor
from langchain_community.llms import VLLM
import asyncio # Import asyncio for asynchronous task management


app = FastAPI()
port = 5001
Expand All @@ -15,19 +15,18 @@
model_loaded = False
llm = None

async def load_model():
def load_model():
global llm, model_loaded
# Create and initialize the model instance
llm = VLLM(model=model,
trust_remote_code=True, # Required for loading HF models
trust_remote_code=True,
max_new_tokens=50,
temperature=0.6
)
model_loaded = True # Update model loading status to True
temperature=0.6)
model_loaded = True

@app.on_event("startup")
async def startup_event():
asyncio.create_task(load_model()) # Schedule load_model to run as a background task
def startup_event():
with ThreadPoolExecutor(max_workers=1) as executor:
executor.submit(load_model)

@app.get("/status")
def get_status():
Expand Down

0 comments on commit a6777bd

Please sign in to comment.