diff --git a/comps/dataprep/README.md b/comps/dataprep/README.md index f7d445db4..62768bde2 100644 --- a/comps/dataprep/README.md +++ b/comps/dataprep/README.md @@ -2,10 +2,22 @@ The Dataprep Microservice aims to preprocess the data from various sources (either structured or unstructured data) to text data, and convert the text data to embedding vectors then store them in the database. +## Use LVM (Large Vision Model) for Summarizing Image Data + +Occasionally unstructured data will contain image data, to convert the image data to the text data, LVM can be used to summarize the image. To leverage LVM, please refer to this [readme](../lvms/README.md) to start the LVM microservice first and then set the below environment variable, before starting any dataprep microservice. + +```bash +export SUMMARIZE_IMAGE_VIA_LVM=1 +``` + # Dataprep Microservice with Redis For details, please refer to this [readme](redis/README.md) +# Dataprep Microservice with Milvus + +For details, please refer to this [readme](milvus/README.md) + # Dataprep Microservice with Qdrant For details, please refer to this [readme](qdrant/README.md) diff --git a/comps/dataprep/utils.py b/comps/dataprep/utils.py index 629571236..b4144cdf7 100644 --- a/comps/dataprep/utils.py +++ b/comps/dataprep/utils.py @@ -1,6 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import base64 import errno import functools import io @@ -198,6 +199,16 @@ def load_csv(input_path): def load_image(image_path): """Load the image file.""" + if os.getenv("SUMMARIZE_IMAGE_VIA_LVM", None) == "1": + query = "Please summarize this image." + image_b64_str = base64.b64encode(open(image_path, "rb").read()).decode() + response = requests.post( + "http://localhost:9399/v1/lvm", + data=json.dumps({"image": image_b64_str, "prompt": query}), + headers={"Content-Type": "application/json"}, + proxies={"http": None}, + ) + return response.json()["text"].strip() loader = UnstructuredImageLoader(image_path) text = loader.load()[0].page_content return text @@ -239,7 +250,12 @@ def document_loader(doc_path): return load_xlsx(doc_path) elif doc_path.endswith(".csv"): return load_csv(doc_path) - elif doc_path.endswith(".tiff"): + elif ( + doc_path.endswith(".tiff") + or doc_path.endswith(".jpg") + or doc_path.endswith(".jpeg") + or doc_path.endswith(".png") + ): return load_image(doc_path) elif doc_path.endswith(".svg"): return load_image(doc_path)