Skip to content

Commit

Permalink
perf(import): lazy load vLLM (#48)
Browse files Browse the repository at this point in the history
Signed-off-by: Aaron Pham <[email protected]>
  • Loading branch information
aarnphm authored Jan 8, 2025
1 parent 53e9e9f commit 7c63590
Showing 1 changed file with 15 additions and 15 deletions.
30 changes: 15 additions & 15 deletions src/vllm-chat/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import fastapi.staticfiles
import PIL.Image
import pydantic
import vllm.entrypoints.openai.api_server as vllm_api_server
import yaml
from fastapi.responses import FileResponse

Expand Down Expand Up @@ -43,18 +42,6 @@ class Message(pydantic.BaseModel):

# openai api app
openai_api_app = fastapi.FastAPI()
OPENAI_ENDPOINTS = [
["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]],
["/completions", vllm_api_server.create_completion, ["POST"]],
["/models", vllm_api_server.show_available_models, ["GET"]],
]
for route, endpoint, methods in OPENAI_ENDPOINTS:
openai_api_app.add_api_route(
path=route,
endpoint=endpoint,
methods=methods,
include_in_schema=True,
)

# chat UI app
ui_app = fastapi.FastAPI()
Expand Down Expand Up @@ -87,7 +74,20 @@ class VLLM:

def __init__(self) -> None:
from vllm import AsyncEngineArgs, AsyncLLMEngine
from vllm.entrypoints.openai.api_server import init_app_state
import vllm.entrypoints.openai.api_server as vllm_api_server

OPENAI_ENDPOINTS = [
["/chat/completions", vllm_api_server.create_chat_completion, ["POST"]],
["/completions", vllm_api_server.create_completion, ["POST"]],
["/models", vllm_api_server.show_available_models, ["GET"]],
]
for route, endpoint, methods in OPENAI_ENDPOINTS:
openai_api_app.add_api_route(
path=route,
endpoint=endpoint,
methods=methods,
include_in_schema=True,
)


ENGINE_ARGS = AsyncEngineArgs(**dict(ENGINE_CONFIG, model=self.model))
Expand Down Expand Up @@ -115,7 +115,7 @@ def __init__(self) -> None:
args.enable_auto_tool_choice = False
args.tool_call_parser = None

init_app_state(self.engine, model_config, openai_api_app.state, args)
vllm_api_server.init_app_state(self.engine, model_config, openai_api_app.state, args)

@bentoml.api
async def generate(self, prompt: str = "what is this?") -> AsyncGenerator[str, None]:
Expand Down

0 comments on commit 7c63590

Please sign in to comment.