From bd385be94f5b141c2e5fb852f56d6fadf0a82b5b Mon Sep 17 00:00:00 2001 From: Sihan Chen <39623753+Spycsh@users.noreply.github.com> Date: Wed, 19 Jun 2024 17:49:08 +0800 Subject: [PATCH] Add LVMs LLaVA component (#181) * add img2txt microservice Signed-off-by: Spycsh * fix * rem * fix * fix * debug * add debug * add standalone llava server to avoid tokenization in diff process issue * add another test * endpoint * debug * rem * fix * disable hpu graph * dockernize * add test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rem nohup * fix * refactor img2txt to lvms * Update test_reranks_langchain.sh * fix test name * rename * lower wait time --------- Signed-off-by: Spycsh Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: chen, suyue --- comps/__init__.py | 1 + comps/cores/mega/constants.py | 1 + comps/cores/proto/docarray.py | 8 +- comps/embeddings/README.md | 2 +- comps/lvms/Dockerfile | 18 +++ comps/lvms/README.md | 111 ++++++++++++++++++ comps/lvms/__init__.py | 2 + comps/lvms/check_lvm.py | 22 ++++ comps/lvms/llava/Dockerfile | 19 +++ comps/lvms/llava/Dockerfile_hpu | 23 ++++ comps/lvms/llava/__init__.py | 2 + comps/lvms/llava/check_llava_server.py | 22 ++++ comps/lvms/llava/llava_server.py | 153 +++++++++++++++++++++++++ comps/lvms/lvm.py | 51 +++++++++ comps/lvms/requirements.txt | 10 ++ tests/test_lvms_llava.sh | 54 +++++++++ tests/test_reranks_langchain.sh | 2 +- 17 files changed, 498 insertions(+), 3 deletions(-) create mode 100644 comps/lvms/Dockerfile create mode 100644 comps/lvms/README.md create mode 100644 comps/lvms/__init__.py create mode 100644 comps/lvms/check_lvm.py create mode 100644 comps/lvms/llava/Dockerfile create mode 100644 comps/lvms/llava/Dockerfile_hpu create mode 100644 comps/lvms/llava/__init__.py create mode 100644 comps/lvms/llava/check_llava_server.py create mode 100644 comps/lvms/llava/llava_server.py create mode 100644 comps/lvms/lvm.py create mode 100644 comps/lvms/requirements.txt create mode 100644 tests/test_lvms_llava.sh diff --git a/comps/__init__.py b/comps/__init__.py index 6b63820d8..14ff81d15 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -16,6 +16,7 @@ TextDoc, RAGASParams, RAGASScores, + LVMDoc, ) # Constants diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py index 35d36f215..6de0d0edc 100644 --- a/comps/cores/mega/constants.py +++ b/comps/cores/mega/constants.py @@ -26,6 +26,7 @@ class ServiceType(Enum): DATAPREP = 9 UNDEFINED = 10 RAGAS = 11 + LVM = 12 class MegaServiceEndpoint(Enum): diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py index d785c2445..23c38890e 100644 --- a/comps/cores/proto/docarray.py +++ b/comps/cores/proto/docarray.py @@ -7,7 +7,7 @@ from docarray import BaseDoc, DocList from docarray.documents import AudioDoc from docarray.typing import AudioUrl -from pydantic import Field, conlist +from pydantic import Field, conint, conlist class TextDoc(BaseDoc): @@ -102,3 +102,9 @@ class RAGASScores(BaseDoc): faithfulness: float context_recallL: float context_precision: float + + +class LVMDoc(BaseDoc): + image: str + prompt: str + max_new_tokens: conint(ge=0, le=1024) = 512 diff --git a/comps/embeddings/README.md b/comps/embeddings/README.md index 169b9831c..4e1249eb7 100644 --- a/comps/embeddings/README.md +++ b/comps/embeddings/README.md @@ -155,6 +155,6 @@ curl http://localhost:6000/v1/health_check\ ```bash curl http://localhost:6000/v1/embeddings\ -X POST \ - -d '{"text":"Hello, world!"}' \ + -d '{"input":"Hello, world!"}' \ -H 'Content-Type: application/json' ``` diff --git a/comps/lvms/Dockerfile b/comps/lvms/Dockerfile new file mode 100644 index 000000000..73be60ba6 --- /dev/null +++ b/comps/lvms/Dockerfile @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +# Set environment variables +ENV LANG=en_US.UTF-8 + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/comps/lvms/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/lvms + +ENTRYPOINT ["python", "lvm.py"] \ No newline at end of file diff --git a/comps/lvms/README.md b/comps/lvms/README.md new file mode 100644 index 000000000..67854885f --- /dev/null +++ b/comps/lvms/README.md @@ -0,0 +1,111 @@ +# LVM Microservice + +Visual Question and Answering is one of the multimodal tasks empowered by LVMs (Large Visual Models). This microservice supports visual Q&A by using LLaVA as the base large visual model. It accepts two inputs: a prompt and an image. It outputs the answer to the prompt about the image. + +# 🚀1. Start Microservice with Python (Option 1) + +## 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` + +## 1.2 Start LLaVA Service/Test + +- Xeon CPU + +```bash +# Start LLaVA service +cd llava/ +nohup python llava_server.py --device=cpu & +# Wait until the server is up +# Test +python check_llava_server.py +``` + +- Gaudi2 HPU + +```bash +pip install optimum[habana] +``` + +```bash +cd llava/ +# Start LLaVA service +nohup python llava_server.py & +# Test +python check_llava_server.py +``` + +## 1.3 Start Image To Text Service/Test + +```bash +cd .. +# Start the OPEA Microservice +python lvm.py +# Test +python check_lvm.py +``` + +# 🚀1. Start Microservice with Docker (Option 2) + +## 1.2 Build Images + +### 1.2.1 LLaVA Server Image + +- Xeon CPU + +```bash +cd ../.. +docker build -t opea/llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/Dockerfile . +``` + +- Gaudi2 HPU + +```bash +cd ../.. +docker build -t opea/llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/Dockerfile_hpu . +``` + +### 1.2.2 LVM Service Image + +```bash +cd ../.. +docker build -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/Dockerfile . +``` + +## 1.3 Start LLaVA and LVM Service + +### 1.3.1 Start LLaVA server + +- Xeon + +```bash +docker run -p 8399:8399 -e http_proxy=$http_proxy --ipc=host -e https_proxy=$https_proxy opea/llava:latest +``` + +- Gaudi2 HPU + +```bash +docker run -p 8399:8399 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/llava:latest +``` + +### 1.3.2 Start LVM service + +```bash +ip_address=$(hostname -I | awk '{print $1}') + +docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LVM_ENDPOINT=http://$ip_address:8399 opea/lvm:latest +``` + +### 1.3.3 Test + +```bash +# Use curl/python + +# curl +http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json' + +# python +python check_lvm.py +``` diff --git a/comps/lvms/__init__.py b/comps/lvms/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/lvms/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/lvms/check_lvm.py b/comps/lvms/check_lvm.py new file mode 100644 index 000000000..3f2ec34f6 --- /dev/null +++ b/comps/lvms/check_lvm.py @@ -0,0 +1,22 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import base64 +import json +from io import BytesIO + +import PIL.Image +import requests + +image_path = "https://avatars.githubusercontent.com/u/39623753?v=4" + +image = PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw) +buffered = BytesIO() +image.save(buffered, format="PNG") +img_b64_str = base64.b64encode(buffered.getvalue()).decode() + +endpoint = "http://localhost:9399/v1/lvm" +inputs = {"image": img_b64_str, "prompt": "What is this?", "max_new_tokens": 32} +response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None}) +print(response.json()) diff --git a/comps/lvms/llava/Dockerfile b/comps/lvms/llava/Dockerfile new file mode 100644 index 000000000..efd2b1d45 --- /dev/null +++ b/comps/lvms/llava/Dockerfile @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +# Set environment variables +ENV LANG=en_US.UTF-8 +ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana + +COPY comps /home/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/comps/lvms/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/lvms/llava + +ENTRYPOINT ["python", "llava_server.py", "--device", "cpu"] \ No newline at end of file diff --git a/comps/lvms/llava/Dockerfile_hpu b/comps/lvms/llava/Dockerfile_hpu new file mode 100644 index 000000000..8513dc2dd --- /dev/null +++ b/comps/lvms/llava/Dockerfile_hpu @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# HABANA environment +FROM vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1 AS hpu +RUN rm -rf /etc/ssh/ssh_host* + +# Set environment variables +ENV LANG=en_US.UTF-8 +ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana + +COPY comps /home/comps + +# Install requirements and optimum habana +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/comps/lvms/requirements.txt && \ + pip install optimum[habana] + +ENV PYTHONPATH=$PYTHONPATH:/home + +WORKDIR /home/comps/lvms/llava + +ENTRYPOINT ["python", "llava_server.py"] \ No newline at end of file diff --git a/comps/lvms/llava/__init__.py b/comps/lvms/llava/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/lvms/llava/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/lvms/llava/check_llava_server.py b/comps/lvms/llava/check_llava_server.py new file mode 100644 index 000000000..1c2da90c3 --- /dev/null +++ b/comps/lvms/llava/check_llava_server.py @@ -0,0 +1,22 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import base64 +import json +from io import BytesIO + +import PIL.Image +import requests + +image_path = "https://avatars.githubusercontent.com/u/39623753?v=4" + +image = PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw) +buffered = BytesIO() +image.save(buffered, format="PNG") +img_b64_str = base64.b64encode(buffered.getvalue()).decode() + +endpoint = "http://localhost:8399/generate" +inputs = {"img_b64_str": img_b64_str, "prompt": "What is this?", "max_new_tokens": 32} +response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None}) +print(response.json()) diff --git a/comps/lvms/llava/llava_server.py b/comps/lvms/llava/llava_server.py new file mode 100644 index 000000000..4e903219b --- /dev/null +++ b/comps/lvms/llava/llava_server.py @@ -0,0 +1,153 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Stand-alone LLaVA FastAPI Server.""" + +import argparse +import base64 +import time +from io import BytesIO + +import PIL.Image +import requests +import torch +import uvicorn +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, Response, StreamingResponse +from transformers import pipeline + +model_name_or_path = None +model_dtype = None +use_hpu_graphs = True + +generator = None + + +app = FastAPI() + + +def process_image(image, max_len=1344, min_len=672): + if max(image.size) > max_len: + max_hw, min_hw = max(image.size), min(image.size) + aspect_ratio = max_hw / min_hw + shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw)) + longest_edge = int(shortest_edge * aspect_ratio) + W, H = image.size + if H > W: + H, W = longest_edge, shortest_edge + else: + H, W = shortest_edge, longest_edge + image = image.resize((W, H)) + return image + + +@app.get("/health") +async def health() -> Response: + """Health check.""" + return Response(status_code=200) + + +@app.post("/generate") +async def generate(request: Request) -> Response: # FIXME batch_size=1 for now, only accept single image + print("LLaVA generation begin.") + request_dict = await request.json() + prompt = request_dict.pop("prompt") + img_b64_str = request_dict.pop("img_b64_str") + max_new_tokens = request_dict.pop("max_new_tokens", 100) + + # format the prompt + prompt = f"\nUSER: {prompt}\nASSISTANT:" + + # Decode and Resize the image + image = PIL.Image.open(BytesIO(base64.b64decode(img_b64_str))) + image = process_image(image) + + if args.device == "hpu": + generate_kwargs = { + "lazy_mode": True, + "hpu_graphs": True, + "max_new_tokens": max_new_tokens, + "ignore_eos": False, + } + else: + generate_kwargs = { + "max_new_tokens": max_new_tokens, + } + + start = time.time() + result = generator(image, prompt=prompt, batch_size=1, generate_kwargs=generate_kwargs) + end = time.time() + result = result[0]["generated_text"].split("ASSISTANT: ")[-1] + print(f"LLaVA result = {result}, time = {(end-start) * 1000 }ms") + image.close() + ret = {"text": result} + return JSONResponse(ret) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default="0.0.0.0") + parser.add_argument("--port", type=int, default=8399) + parser.add_argument("--model_name_or_path", type=str, default="llava-hf/llava-1.5-7b-hf") + parser.add_argument("--use_hpu_graphs", default=False, action="store_true") + parser.add_argument("--warmup", type=int, default=1, help="Number of warmup iterations for benchmarking.") + parser.add_argument("--device", type=str, default="hpu") + parser.add_argument("--bf16", default=True, action="store_true") + + args = parser.parse_args() + print(f"device: {args.device}") + if args.device == "hpu": + from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi + + adapt_transformers_to_gaudi() + + if args.bf16: + model_dtype = torch.bfloat16 + else: + model_dtype = torch.float32 + + model_name_or_path = args.model_name_or_path + + generator = pipeline( + "image-to-text", + model=args.model_name_or_path, + torch_dtype=model_dtype, + device=args.device, + ) + + # warmup + print("LLaVA warmup...") + if args.device == "hpu": + generate_kwargs = { + "lazy_mode": True, + "hpu_graphs": True, + "max_new_tokens": 128, + "ignore_eos": False, + } + else: + generate_kwargs = { + "max_new_tokens": 128, + } + + if args.device == "hpu" and args.use_hpu_graphs: + from habana_frameworks.torch.hpu import wrap_in_hpu_graph + + generator.model = wrap_in_hpu_graph(generator.model) + + image_paths = ["https://llava-vl.github.io/static/images/view.jpg"] + images = [] + for image_path in image_paths: + images.append(PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw)) + for i in range(args.warmup): + generator( + images, + prompt="\nUSER: What's the content of the image?\nASSISTANT:", + batch_size=1, + generate_kwargs=generate_kwargs, + ) + + uvicorn.run( + app, + host=args.host, + port=args.port, + log_level="debug", + ) diff --git a/comps/lvms/lvm.py b/comps/lvms/lvm.py new file mode 100644 index 000000000..a60f6813f --- /dev/null +++ b/comps/lvms/lvm.py @@ -0,0 +1,51 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import json +import os +import time + +import requests + +from comps import ( + LVMDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) + + +@register_microservice( + name="opea_service@lvm", + service_type=ServiceType.LVM, + endpoint="/v1/lvm", + host="0.0.0.0", + port=9399, + input_datatype=LVMDoc, + output_datatype=TextDoc, +) +@register_statistics(names=["opea_service@lvm"]) +async def lvm(request: LVMDoc): + start = time.time() + img_b64_str = request.image + prompt = request.prompt + max_new_tokens = request.max_new_tokens + + inputs = {"img_b64_str": img_b64_str, "prompt": prompt, "max_new_tokens": max_new_tokens} + + # forward to the LLaVA server + response = requests.post(url=f"{lvm_endpoint}/generate", data=json.dumps(inputs), proxies={"http": None}) + + statistics_dict["opea_service@lvm"].append_latency(time.time() - start, None) + return TextDoc(text=response.json()["text"]) + + +if __name__ == "__main__": + lvm_endpoint = os.getenv("LVM_ENDPOINT", "http://localhost:8399") + + print("[LVM] LVM initialized.") + opea_microservices["opea_service@lvm"].start() diff --git a/comps/lvms/requirements.txt b/comps/lvms/requirements.txt new file mode 100644 index 000000000..a2a605a45 --- /dev/null +++ b/comps/lvms/requirements.txt @@ -0,0 +1,10 @@ +datasets +docarray[full] +fastapi +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +optimum[habana] +pydantic==2.7.2 +pydub +shortuuid diff --git a/tests/test_lvms_llava.sh b/tests/test_lvms_llava.sh new file mode 100644 index 000000000..da7c740a9 --- /dev/null +++ b/tests/test_lvms_llava.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build -t opea/llava:latest -f comps/lvms/llava/Dockerfile . + docker build --no-cache -t opea/lvm:latest -f comps/lvms/Dockerfile . +} + +function start_service() { + unset http_proxy + docker run -d --name="test-comps-lvm-llava" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 8399:8399 --ipc=host opea/llava:latest + docker run -d --name="test-comps-lvm" -e LVM_ENDPOINT=http://$ip_address:8399 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9399:9399 --ipc=host opea/lvm:latest + sleep 8m +} + +function validate_microservice() { + result=$(http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json') + if [[ $result == *"yellow"* ]]; then + echo "Result correct." + else + echo "Result wrong." + exit 1 + fi + +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-lvm*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/test_reranks_langchain.sh b/tests/test_reranks_langchain.sh index 38db4d3fc..1a3751927 100644 --- a/tests/test_reranks_langchain.sh +++ b/tests/test_reranks_langchain.sh @@ -24,7 +24,7 @@ function start_service() { tei_service_port=5007 unset http_proxy docker run -d --name="test-comps-reranking-tei-server" -p ${tei_service_port}:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_RERANKING_ENDPOINT=$TEI_RERANKING_ENDPOINT -e HF_TOKEN=$HF_TOKEN opea/reranking-tei:comps - sleep 1m + sleep 3m } function validate_microservice() {