vllm-project · ywang96 · Jun 18, 2024 · May 22, 2024 · May 23, 2024 · May 23, 2024
diff --git a/examples/phi3v_example.py b/examples/phi3v_example.py
@@ -0,0 +1,45 @@
+import os
+
+from PIL import Image
+import requests
+from transformers import AutoProcessor
+
+from vllm import LLM
+from vllm.sequence import MultiModalData
+
+
+# os.environ["VLLM_CPU_KVCACHE_SPACE"] = "10"
+
+def run_phi3v():
+    model_path = "/data/LLM-model/Phi-3-vision-128k-instruct"
+    processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
+    llm = LLM(
+        model=model_path,
+        trust_remote_code=True,
+        max_model_len=8192,
+        image_input_type="pixel_values",
+        image_token_id=-1,
+        image_input_shape="1008, 1344",
+        image_feature_size=1024,
+    )
+
+    url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+    image = Image.open(requests.get(url, stream=True).raw)
+    user_prompt = '<|user|>\n'
+    assistant_prompt = '<|assistant|>\n'
+    prompt_suffix = "<|end|>\n"
+
+    # single-image prompt
+    prompt = f"{user_prompt}<|image_1|>\nWhat is shown in this image?{prompt_suffix}{assistant_prompt}"
+    inputs = processor(prompt, image, return_tensors="pt")
+    multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=inputs["pixel_values"])
+
+    outputs = llm.generate(prompt_token_ids=inputs["input_ids"].tolist(), multi_modal_data=multi_modal_data)
+    # outputs = llm.generate(prompt)
+    for o in outputs:
+        generated_text = o.outputs[0].text
+        print(generated_text)
+
+
+if __name__ == "__main__":
+    run_phi3v()
diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
@@ -47,6 +47,7 @@
     "OrionForCausalLM": ("orion", "OrionForCausalLM"),
     "PhiForCausalLM": ("phi", "PhiForCausalLM"),
     "Phi3ForCausalLM": ("llama", "LlamaForCausalLM"),
+    "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
     "QWenLMHeadModel": ("qwen", "QWenLMHeadModel"),
     "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
     "Qwen2MoeForCausalLM": ("qwen2_moe", "Qwen2MoeForCausalLM"),