Add LVMs LLaVA component (#181)

* add img2txt microservice Signed-off-by: Spycsh <[email protected]> * fix * rem * fix * fix * debug * add debug * add standalone llava server to avoid tokenization in diff process issue * add another test * endpoint * debug * rem * fix * disable hpu graph * dockernize * add test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rem nohup * fix * refactor img2txt to lvms * Update test_reranks_langchain.sh * fix test name * rename * lower wait time --------- Signed-off-by: Spycsh <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: chen, suyue <[email protected]>
opea-project · Jun 19, 2024 · bd385be · bd385be
1 parent 16c5fdf
commit bd385be
Show file tree

Hide file tree

Showing 17 changed files with 498 additions and 3 deletions.
diff --git a/comps/__init__.py b/comps/__init__.py
@@ -16,6 +16,7 @@
     TextDoc,
     RAGASParams,
     RAGASScores,
+    LVMDoc,
 )
 
 # Constants

diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py
@@ -26,6 +26,7 @@ class ServiceType(Enum):
     DATAPREP = 9
     UNDEFINED = 10
     RAGAS = 11
+    LVM = 12
 
 
 class MegaServiceEndpoint(Enum):

diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py
@@ -7,7 +7,7 @@
 from docarray import BaseDoc, DocList
 from docarray.documents import AudioDoc
 from docarray.typing import AudioUrl
-from pydantic import Field, conlist
+from pydantic import Field, conint, conlist
 
 
 class TextDoc(BaseDoc):
@@ -102,3 +102,9 @@ class RAGASScores(BaseDoc):
     faithfulness: float
     context_recallL: float
     context_precision: float
+
+
+class LVMDoc(BaseDoc):
+    image: str
+    prompt: str
+    max_new_tokens: conint(ge=0, le=1024) = 512
diff --git a/comps/embeddings/README.md b/comps/embeddings/README.md
@@ -155,6 +155,6 @@ curl http://localhost:6000/v1/health_check\
 ```bash
 curl http://localhost:6000/v1/embeddings\
   -X POST \
-  -d '{"text":"Hello, world!"}' \
+  -d '{"input":"Hello, world!"}' \
   -H 'Content-Type: application/json'
 ```
diff --git a/comps/lvms/Dockerfile b/comps/lvms/Dockerfile
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+# Set environment variables
+ENV LANG=en_US.UTF-8
+
+COPY comps /home/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/comps/lvms/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home
+
+WORKDIR /home/comps/lvms
+
+ENTRYPOINT ["python", "lvm.py"]
diff --git a/comps/lvms/README.md b/comps/lvms/README.md
@@ -0,0 +1,111 @@
+# LVM Microservice
+
+Visual Question and Answering is one of the multimodal tasks empowered by LVMs (Large Visual Models). This microservice supports visual Q&A by using LLaVA as the base large visual model. It accepts two inputs: a prompt and an image. It outputs the answer to the prompt about the image.
+
+# 🚀1. Start Microservice with Python (Option 1)
+
+## 1.1 Install Requirements
+
+```bash
+pip install -r requirements.txt
+```
+
+## 1.2 Start LLaVA Service/Test
+
+- Xeon CPU
+
+```bash
+# Start LLaVA service
+cd llava/
+nohup python llava_server.py --device=cpu &
+# Wait until the server is up
+# Test
+python check_llava_server.py
+```
+
+- Gaudi2 HPU
+
+```bash
+pip install optimum[habana]
+```
+
+```bash
+cd llava/
+# Start LLaVA service
+nohup python llava_server.py &
+# Test
+python check_llava_server.py
+```
+
+## 1.3 Start Image To Text Service/Test
+
+```bash
+cd ..
+# Start the OPEA Microservice
+python lvm.py
+# Test
+python check_lvm.py
+```
+
+# 🚀1. Start Microservice with Docker (Option 2)
+
+## 1.2 Build Images
+
+### 1.2.1 LLaVA Server Image
+
+- Xeon CPU
+
+```bash
+cd ../..
+docker build -t opea/llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/Dockerfile .
+```
+
+- Gaudi2 HPU
+
+```bash
+cd ../..
+docker build -t opea/llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/llava/Dockerfile_hpu .
+```
+
+### 1.2.2 LVM Service Image
+
+```bash
+cd ../..
+docker build -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/Dockerfile .
+```
+
+## 1.3 Start LLaVA and LVM Service
+
+### 1.3.1 Start LLaVA server
+
+- Xeon
+
+```bash
+docker run -p 8399:8399 -e http_proxy=$http_proxy --ipc=host -e https_proxy=$https_proxy opea/llava:latest
+```
+
+- Gaudi2 HPU
+
+```bash
+docker run -p 8399:8399 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/llava:latest
+```
+
+### 1.3.2 Start LVM service
+
+```bash
+ip_address=$(hostname -I | awk '{print $1}')
+
+docker run -p 9399:9399 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e LVM_ENDPOINT=http://$ip_address:8399 opea/lvm:latest
+```
+
+### 1.3.3 Test
+
+```bash
+# Use curl/python
+
+# curl
+http_proxy="" curl http://localhost:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json'
+
+# python
+python check_lvm.py
+```
diff --git a/comps/lvms/__init__.py b/comps/lvms/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/lvms/check_lvm.py b/comps/lvms/check_lvm.py
@@ -0,0 +1,22 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import base64
+import json
+from io import BytesIO
+
+import PIL.Image
+import requests
+
+image_path = "https://avatars.githubusercontent.com/u/39623753?v=4"
+
+image = PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw)
+buffered = BytesIO()
+image.save(buffered, format="PNG")
+img_b64_str = base64.b64encode(buffered.getvalue()).decode()
+
+endpoint = "http://localhost:9399/v1/lvm"
+inputs = {"image": img_b64_str, "prompt": "What is this?", "max_new_tokens": 32}
+response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
+print(response.json())
diff --git a/comps/lvms/llava/Dockerfile b/comps/lvms/llava/Dockerfile
@@ -0,0 +1,19 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+# Set environment variables
+ENV LANG=en_US.UTF-8
+ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana
+
+COPY comps /home/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/comps/lvms/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home
+
+WORKDIR /home/comps/lvms/llava
+
+ENTRYPOINT ["python", "llava_server.py", "--device", "cpu"]
diff --git a/comps/lvms/llava/Dockerfile_hpu b/comps/lvms/llava/Dockerfile_hpu
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# HABANA environment
+FROM vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1 AS hpu
+RUN rm -rf /etc/ssh/ssh_host*
+
+# Set environment variables
+ENV LANG=en_US.UTF-8
+ENV PYTHONPATH=/home/user:/usr/lib/habanalabs/:/optimum-habana
+
+COPY comps /home/comps
+
+# Install requirements and optimum habana
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/comps/lvms/requirements.txt && \
+    pip install optimum[habana]
+
+ENV PYTHONPATH=$PYTHONPATH:/home
+
+WORKDIR /home/comps/lvms/llava
+
+ENTRYPOINT ["python", "llava_server.py"]
diff --git a/comps/lvms/llava/__init__.py b/comps/lvms/llava/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/lvms/llava/check_llava_server.py b/comps/lvms/llava/check_llava_server.py
@@ -0,0 +1,22 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+import base64
+import json
+from io import BytesIO
+
+import PIL.Image
+import requests
+
+image_path = "https://avatars.githubusercontent.com/u/39623753?v=4"
+
+image = PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw)
+buffered = BytesIO()
+image.save(buffered, format="PNG")
+img_b64_str = base64.b64encode(buffered.getvalue()).decode()
+
+endpoint = "http://localhost:8399/generate"
+inputs = {"img_b64_str": img_b64_str, "prompt": "What is this?", "max_new_tokens": 32}
+response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
+print(response.json())