Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Misc] Add vLLM version getter to utils #5098

Merged
merged 5 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def find_version(filepath: str) -> str:


def get_vllm_version() -> str:
version = find_version(get_path("vllm", "__init__.py"))
version = find_version(get_path("vllm", "version.py"))

if _is_cuda():
cuda_version = str(get_nvcc_cuda_version())
Expand Down
3 changes: 2 additions & 1 deletion vllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams

__version__ = "0.5.0"
from .version import __version__

__all__ = [
"__version__",
"LLM",
"ModelRegistry",
"PromptStrictInputs",
Expand Down
4 changes: 2 additions & 2 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from transformers import GenerationConfig, PreTrainedTokenizer

import vllm
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, LoadConfig,
LoRAConfig, ModelConfig, ParallelConfig,
SchedulerConfig, SpeculativeConfig,
Expand Down Expand Up @@ -38,6 +37,7 @@
from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled,
usage_message)
from vllm.utils import Counter
from vllm.version import __version__ as VLLM_VERSION

logger = init_logger(__name__)
_LOCAL_LOGGING_INTERVAL_SEC = 5
Expand Down Expand Up @@ -169,7 +169,7 @@ def __init__(
"enforce_eager=%s, kv_cache_dtype=%s, "
"quantization_param_path=%s, device_config=%s, "
"decoding_config=%r, seed=%d, served_model_name=%s)",
vllm.__version__,
VLLM_VERSION,
model_config.model,
speculative_config,
model_config.tokenizer,
Expand Down
6 changes: 3 additions & 3 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from prometheus_client import make_asgi_app
from starlette.routing import Mount

import vllm
import vllm.envs as envs
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
Expand All @@ -29,6 +28,7 @@
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.version import __version__ as VLLM_VERSION

TIMEOUT_KEEP_ALIVE = 5 # seconds

Expand Down Expand Up @@ -93,7 +93,7 @@ async def show_available_models():

@app.get("/version")
async def show_version():
ver = {"version": vllm.__version__}
ver = {"version": VLLM_VERSION}
return JSONResponse(content=ver)


Expand Down Expand Up @@ -174,7 +174,7 @@ async def authentication(request: Request, call_next):
raise ValueError(f"Invalid middleware {middleware}. "
f"Must be a function or a class.")

logger.info("vLLM API server version %s", vllm.__version__)
logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args)

if args.served_model_name is not None:
Expand Down
4 changes: 2 additions & 2 deletions vllm/entrypoints/openai/run_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import aiohttp

import vllm
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (BatchRequestInput,
Expand All @@ -15,6 +14,7 @@
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.utils import random_uuid
from vllm.version import __version__ as VLLM_VERSION
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it necessary to do as VLLM_VERSION?

Copy link
Member Author

@DarkLight1337 DarkLight1337 Jun 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since __version__ also exists for other libraries, I think it is clearer to use VLLM_VERSION to indicate that the variable is referring to the version of vLLM specifically.


logger = init_logger(__name__)

Expand Down Expand Up @@ -135,7 +135,7 @@ async def main(args):
if __name__ == "__main__":
args = parse_args()

logger.info("vLLM API server version %s", vllm.__version__)
logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args)

asyncio.run(main(args))
4 changes: 2 additions & 2 deletions vllm/usage/usage_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import torch

import vllm.envs as envs
from vllm.version import __version__ as VLLM_VERSION

_config_home = envs.VLLM_CONFIG_ROOT
_USAGE_STATS_JSON_PATH = os.path.join(_config_home, "vllm/usage_stats.json")
Expand Down Expand Up @@ -163,9 +164,8 @@ def _report_usage_once(self, model_architecture: str,
])

# vLLM information
import vllm # delayed import to prevent circular import
self.context = usage_context.value
self.vllm_version = vllm.__version__
self.vllm_version = VLLM_VERSION
self.model_architecture = model_architecture

# Metadata
Expand Down
1 change: 1 addition & 0 deletions vllm/version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.5.0"
Loading