From 5748471ca6b0300e8ac41230e347659e435f110f Mon Sep 17 00:00:00 2001 From: gadmarkovits Date: Mon, 24 Jun 2024 12:09:26 +0300 Subject: [PATCH] Reranking using an optimized bi-encoder (#219) Signed-off-by: gadmarkovits --- comps/__init__.py | 1 + comps/cores/proto/docarray.py | 5 ++ comps/reranks/fastrag/README.md | 69 ++++++++++++++++++++++ comps/reranks/fastrag/__init__.py | 2 + comps/reranks/fastrag/config.py | 7 +++ comps/reranks/fastrag/docker/Dockerfile | 35 +++++++++++ comps/reranks/fastrag/local_reranking.py | 38 ++++++++++++ comps/reranks/fastrag/requirements.txt | 10 ++++ comps/reranks/langchain/local_reranking.py | 4 +- tests/test_reranks_fastrag.sh | 50 ++++++++++++++++ 10 files changed, 219 insertions(+), 2 deletions(-) create mode 100644 comps/reranks/fastrag/README.md create mode 100644 comps/reranks/fastrag/__init__.py create mode 100644 comps/reranks/fastrag/config.py create mode 100644 comps/reranks/fastrag/docker/Dockerfile create mode 100644 comps/reranks/fastrag/local_reranking.py create mode 100644 comps/reranks/fastrag/requirements.txt create mode 100644 tests/test_reranks_fastrag.sh diff --git a/comps/__init__.py b/comps/__init__.py index 6f753aaeb..d4021e2c2 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -13,6 +13,7 @@ GeneratedDoc, LLMParamsDoc, SearchedDoc, + RerankedDoc, TextDoc, RAGASParams, RAGASScores, diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py index 035b5edbe..d98ea6959 100644 --- a/comps/cores/proto/docarray.py +++ b/comps/cores/proto/docarray.py @@ -69,6 +69,11 @@ class GeneratedDoc(BaseDoc): prompt: str +class RerankedDoc(BaseDoc): + reranked_docs: DocList[TextDoc] + initial_query: str + + class LLMParamsDoc(BaseDoc): query: str max_new_tokens: int = 1024 diff --git a/comps/reranks/fastrag/README.md b/comps/reranks/fastrag/README.md new file mode 100644 index 000000000..fac481860 --- /dev/null +++ b/comps/reranks/fastrag/README.md @@ -0,0 +1,69 @@ +# Reranking Microservice + +The Reranking Microservice, fueled by reranking models, stands as a straightforward yet immensely potent tool for semantic search. When provided with a query and a collection of documents, reranking swiftly indexes the documents based on their semantic relevance to the query, arranging them from most to least pertinent. This microservice significantly enhances overall accuracy. In a text retrieval system, either a dense embedding model or a sparse lexical search index is often employed to retrieve relevant text documents based on the input. However, a reranking model can further refine this process by rearranging potential candidates into a final, optimized order. + +# 🚀1. Start Microservice with Python (Option 1) + +To start the Reranking microservice, you must first install the required python packages. + +## 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` + +## 1.2 Install fastRAG + +```bash +git clone https://github.com/IntelLabs/fastRAG.git +cd fastRag +pip install . +pip install .[intel] +``` + +## 1.3 Start Reranking Service with Python Script + +```bash +export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static" +python local_reranking.py +``` + +# 🚀2. Start Microservice with Docker (Option 2) + +## 2.1 Setup Environment Variables + +```bash +export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static" +``` + +## 2.2 Build Docker Image + +```bash +cd ../../ +docker build -t opea/reranking-fastrag:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/fastrag/docker/Dockerfile . +``` + +## 2.3 Run Docker + +```bash +docker run -d --name="reranking-fastrag-server" -p 8000:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e EMBED_MODEL=$EMBED_MODEL opea/reranking-fastrag:latest +``` + +# 🚀3. Consume Reranking Service + +## 3.1 Check Service Status + +```bash +curl http://localhost:8000/v1/health_check \ + -X GET \ + -H 'Content-Type: application/json' +``` + +## 3.2 Consume Reranking Service + +```bash +curl http://localhost:8000/v1/reranking \ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' +``` diff --git a/comps/reranks/fastrag/__init__.py b/comps/reranks/fastrag/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/reranks/fastrag/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/reranks/fastrag/config.py b/comps/reranks/fastrag/config.py new file mode 100644 index 000000000..70a6fde8b --- /dev/null +++ b/comps/reranks/fastrag/config.py @@ -0,0 +1,7 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +# Re-ranking model +RANKER_MODEL = os.getenv("EMBED_MODEL", "Intel/bge-small-en-v1.5-rag-int8-static") diff --git a/comps/reranks/fastrag/docker/Dockerfile b/comps/reranks/fastrag/docker/Dockerfile new file mode 100644 index 000000000..9f3a62982 --- /dev/null +++ b/comps/reranks/fastrag/docker/Dockerfile @@ -0,0 +1,35 @@ + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.10-slim + +ENV LANG C.UTF-8 + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim \ + git + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN git clone https://github.com/IntelLabs/fastRAG.git /home/user/fastRAG && \ + cd /home/user/fastRAG && \ + pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/reranks/fastrag/requirements.txt && \ + pip install . && \ + pip install .[intel] + +ENV PYTHONPATH=$PYTHONPH:/home/user + +WORKDIR /home/user/comps/reranks/fastrag + +ENTRYPOINT ["python", "local_reranking.py"] + diff --git a/comps/reranks/fastrag/local_reranking.py b/comps/reranks/fastrag/local_reranking.py new file mode 100644 index 000000000..f11ebc1f9 --- /dev/null +++ b/comps/reranks/fastrag/local_reranking.py @@ -0,0 +1,38 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from config import RANKER_MODEL +from fastrag.rankers import IPEXBiEncoderSimilarityRanker +from haystack import Document +from langsmith import traceable + +from comps.cores.mega.micro_service import ServiceType, opea_microservices, register_microservice +from comps.cores.proto.docarray import RerankedDoc, SearchedDoc, TextDoc + + +@register_microservice( + name="opea_service@local_reranking", + service_type=ServiceType.RERANK, + endpoint="/v1/reranking", + host="0.0.0.0", + port=8000, + input_datatype=SearchedDoc, + output_datatype=RerankedDoc, +) +@traceable(run_type="llm") +def reranking(input: SearchedDoc) -> RerankedDoc: + documents = [] + for i, d in enumerate(input.retrieved_docs): + documents.append(Document(content=d.text, id=(i + 1))) + sorted_documents = reranker_model.run(input.initial_query, documents)["documents"] + ranked_documents = [TextDoc(id=doc.id, text=doc.content) for doc in sorted_documents] + res = RerankedDoc(initial_query=input.initial_query, reranked_docs=ranked_documents) + return res + + +if __name__ == "__main__": + # Use an optimized quantized bi-encoder model for re-reranking + reranker_model = IPEXBiEncoderSimilarityRanker(RANKER_MODEL) + reranker_model.warm_up() + + opea_microservices["opea_service@local_reranking"].start() diff --git a/comps/reranks/fastrag/requirements.txt b/comps/reranks/fastrag/requirements.txt new file mode 100644 index 000000000..5cf7a7c44 --- /dev/null +++ b/comps/reranks/fastrag/requirements.txt @@ -0,0 +1,10 @@ +docarray[full] +fastapi +haystack-ai +langchain +langsmith +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +sentence_transformers +shortuuid diff --git a/comps/reranks/langchain/local_reranking.py b/comps/reranks/langchain/local_reranking.py index f3a505599..d0fa8a79c 100644 --- a/comps/reranks/langchain/local_reranking.py +++ b/comps/reranks/langchain/local_reranking.py @@ -4,7 +4,7 @@ from langsmith import traceable from sentence_transformers import CrossEncoder -from comps import RerankedDoc, SearchedDoc, ServiceType, opea_microservices, register_microservice +from comps import RerankedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice @register_microservice( @@ -21,7 +21,7 @@ def reranking(input: SearchedDoc) -> RerankedDoc: query_and_docs = [(input.initial_query, doc.text) for doc in input.retrieved_docs] scores = reranker_model.predict(query_and_docs) first_passage = sorted(list(zip(input.retrieved_docs, scores)), key=lambda x: x[1], reverse=True)[0][0] - res = RerankedDoc(query=input.query, doc=first_passage) + res = RerankedDoc(initial_query=input.initial_query, reranked_docs=[first_passage]) return res diff --git a/tests/test_reranks_fastrag.sh b/tests/test_reranks_fastrag.sh new file mode 100644 index 000000000..d423d19d5 --- /dev/null +++ b/tests/test_reranks_fastrag.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') +function build_docker_images() { + cd $WORKPATH + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/reranking-fastrag:comps -f comps/reranks/fastrag/docker/Dockerfile . +} + +function start_service() { + export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static" + fastrag_service_port=8000 + unset http_proxy + docker run -d --name="test-comps-reranking-fastrag-server" -p ${fastrag_service_port}:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e EMBED_MODEL=$EMBED_MODEL opea/reranking-fastrag:comps + sleep 3m +} + +function validate_microservice() { + fastrag_service_port=8000 + http_proxy="" curl http://${ip_address}:${fastrag_service_port}/v1/reranking\ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' + docker logs test-comps-reranking-fastrag-server +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-rerank*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main