Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Misc][Doc] Add Example of using OpenAI Server with VLM #5832

Merged
merged 2 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/models/vlm.rst
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ To consume the server, you can use the OpenAI client like in the example below:
)
print("Chat response:", chat_response)

A full code example can be found in `examples/openai_vision_api_client.py <https://github.com/vllm-project/vllm/blob/main/examples/openai_vision_api_client.py>`_.

.. note::

By default, the timeout for fetching images through http url is ``5`` seconds. You can override this by setting the environment variable:
Expand Down
90 changes: 90 additions & 0 deletions examples/openai_vision_api_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""An example showing how to use vLLM to serve VLMs.

Launch the vLLM server with the following command:
python -m vllm.entrypoints.openai.api_server \
--model llava-hf/llava-1.5-7b-hf \
--image-input-type pixel_values \
--image-token-id 32000 \
--image-input-shape 1,3,336,336 \
--image-feature-size 576 \
--chat-template template_llava.jinja
"""
import base64

import requests
from openai import OpenAI

# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"

client = OpenAI(
# defaults to os.environ.get("OPENAI_API_KEY")
api_key=openai_api_key,
base_url=openai_api_base,
)

models = client.models.list()
model = models.data[0].id

image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"

# Use image url in the payload
chat_completion_from_url = client.chat.completions.create(
messages=[{
"role":
"user",
"content": [
{
"type": "text",
"text": "What’s in this image?"
},
{
"type": "image_url",
"image_url": {
"url": image_url
},
},
],
}],
model=model,
)

result = chat_completion_from_url.choices[0].message.content
print(f"Chat completion output:{result}")


# Use base64 encoded image in the payload
def encode_image_base64_from_url(image_url: str) -> str:
"""Encode an image retrieved from a remote url to base64 format."""

with requests.get(image_url) as response:
response.raise_for_status()
result = base64.b64encode(response.content).decode('utf-8')

return result


image_base64 = encode_image_base64_from_url(image_url=image_url)
chat_completion_from_base64 = client.chat.completions.create(
messages=[{
"role":
"user",
"content": [
{
"type": "text",
"text": "What’s in this image?"
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_base64}"
},
},
],
}],
model=model,
)

result = chat_completion_from_base64.choices[0].message.content
print(f"Chat completion output:{result}")
12 changes: 9 additions & 3 deletions vllm/multimodal/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import base64
from io import BytesIO
from typing import Optional, Union
from urllib.parse import urlparse

import aiohttp
from PIL import Image
Expand Down Expand Up @@ -28,6 +29,10 @@ async def fetch_image(cls, image_url: str) -> Image.Image:
"""Load PIL image from a url or base64 encoded openai GPT4V format"""

if image_url.startswith('http'):
parsed_url = urlparse(image_url)
if parsed_url.scheme not in ["http", "https"]:
raise ValueError("Invalid 'image_url': A valid 'image_url' "
"must have scheme 'http' or 'https'.")
# Avoid circular import
from vllm import __version__ as VLLM_VERSION

Expand All @@ -44,8 +49,9 @@ async def fetch_image(cls, image_url: str) -> Image.Image:
image = load_image_from_base64(image_url.split(',', 1)[1])

else:
raise ValueError("Invalid image url: A valid image url must start "
"with either 'data:image' or 'http'.")
raise ValueError(
"Invalid 'image_url': A valid 'image_url' must start "
"with either 'data:image' or 'http'.")

return image

Expand All @@ -56,7 +62,7 @@ async def async_get_and_parse_image(image_url: str) -> ImagePixelData:


def encode_image_base64(image: Image.Image, format: str = 'JPEG') -> str:
"""encode image to base64 format."""
"""Encode a pillow image to base64 format."""

buffered = BytesIO()
if format == 'JPEG':
Expand Down
Loading