Support llamaindex for retrieval microservice and remove langchain de…

…pendency for llm and rerank microservice (#152) * remove langchain dependency for llm and rerank Signed-off-by: lvliang-intel <[email protected]> * add llamaindex support for retrieval Signed-off-by: lvliang-intel <[email protected]> * fix schema issue Signed-off-by: lvliang-intel <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix dockerfile Signed-off-by: lvliang-intel <[email protected]> * update readme Signed-off-by: lvliang-intel <[email protected]> * update reamde Signed-off-by: lvliang-intel <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix entrypoint Signed-off-by: lvliang-intel <[email protected]> * add dataprep process in test script Signed-off-by: lvliang-intel <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix redis url for dataprep Signed-off-by: lvliang-intel <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update readme Signed-off-by: lvliang-intel <[email protected]> * update code Signed-off-by: lvliang-intel <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: lvliang-intel <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: root <[email protected]>
opea-project · Jul 9, 2024 · 61795fd · 61795fd
1 parent 9b658f4
commit 61795fd
Show file tree

Hide file tree

Showing 18 changed files with 356 additions and 36 deletions.
diff --git a/comps/llms/text-generation/tgi/llm.py b/comps/llms/text-generation/tgi/llm.py
@@ -5,7 +5,7 @@
 import time
 
 from fastapi.responses import StreamingResponse
-from langchain_community.llms import HuggingFaceEndpoint
+from huggingface_hub import AsyncInferenceClient
 from langsmith import traceable
 
 from comps import (
@@ -28,26 +28,23 @@
 )
 @traceable(run_type="llm")
 @register_statistics(names=["opea_service@llm_tgi"])
-def llm_generate(input: LLMParamsDoc):
+async def llm_generate(input: LLMParamsDoc):
+    stream_gen_time = []
     start = time.time()
-    llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
-    llm = HuggingFaceEndpoint(
-        endpoint_url=llm_endpoint,
-        max_new_tokens=input.max_new_tokens,
-        top_k=input.top_k,
-        top_p=input.top_p,
-        typical_p=input.typical_p,
-        temperature=input.temperature,
-        repetition_penalty=input.repetition_penalty,
-        streaming=input.streaming,
-        timeout=600,
-    )
     if input.streaming:
-        stream_gen_time = []
 
         async def stream_generator():
             chat_response = ""
-            async for text in llm.astream(input.query):
+            text_generation = await llm.text_generation(
+                prompt=input.query,
+                stream=input.streaming,
+                max_new_tokens=input.max_new_tokens,
+                repetition_penalty=input.repetition_penalty,
+                temperature=input.temperature,
+                top_k=input.top_k,
+                top_p=input.top_p,
+            )
+            async for text in text_generation:
                 stream_gen_time.append(time.time() - start)
                 chat_response += text
                 chunk_repr = repr(text.encode("utf-8"))
@@ -59,10 +56,23 @@ async def stream_generator():
 
         return StreamingResponse(stream_generator(), media_type="text/event-stream")
     else:
-        response = llm.invoke(input.query)
+        response = await llm.text_generation(
+            prompt=input.query,
+            stream=input.streaming,
+            max_new_tokens=input.max_new_tokens,
+            repetition_penalty=input.repetition_penalty,
+            temperature=input.temperature,
+            top_k=input.top_k,
+            top_p=input.top_p,
+        )
         statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None)
         return GeneratedDoc(text=response, prompt=input.query)
 
 
 if __name__ == "__main__":
+    llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
+    llm = AsyncInferenceClient(
+        model=llm_endpoint,
+        timeout=600,
+    )
     opea_microservices["opea_service@llm_tgi"].start()
diff --git a/comps/llms/text-generation/tgi/requirements.txt b/comps/llms/text-generation/tgi/requirements.txt
@@ -1,7 +1,6 @@
 docarray[full]
 fastapi
 huggingface_hub
-langchain==0.1.16
 langsmith
 opentelemetry-api
 opentelemetry-exporter-otlp

diff --git a/comps/reranks/requirements.txt b/comps/reranks/requirements.txt
@@ -1,6 +1,5 @@
 docarray[full]
 fastapi
-langchain
 langsmith
 opentelemetry-api
 opentelemetry-exporter-otlp

diff --git a/comps/reranks/langchain/__init__.py → comps/reranks/tei/__init__.py b/comps/reranks/langchain/__init__.py → comps/reranks/tei/__init__.py
diff --git a/comps/reranks/langchain/docker/Dockerfile → comps/reranks/tei/docker/Dockerfile b/comps/reranks/langchain/docker/Dockerfile → comps/reranks/tei/docker/Dockerfile
@@ -27,7 +27,7 @@ RUN pip install --no-cache-dir --upgrade pip && \
 
 ENV PYTHONPATH=$PYTHONPATH:/home/user
 
-WORKDIR /home/user/comps/reranks/langchain
+WORKDIR /home/user/comps/reranks/tei
 
-ENTRYPOINT ["python", "reranking_tei_xeon.py"]
+ENTRYPOINT ["python", "reranking_tei.py"]
 
diff --git a/...hain/docker/docker_compose_reranking.yaml → .../tei/docker/docker_compose_reranking.yaml b/...hain/docker/docker_compose_reranking.yaml → .../tei/docker/docker_compose_reranking.yaml
diff --git a/comps/reranks/langchain/local_reranking.py → comps/reranks/tei/local_reranking.py b/comps/reranks/langchain/local_reranking.py → comps/reranks/tei/local_reranking.py
diff --git a/...s/reranks/langchain/reranking_tei_xeon.py → comps/reranks/tei/reranking_tei.py b/...s/reranks/langchain/reranking_tei_xeon.py → comps/reranks/tei/reranking_tei.py
@@ -8,7 +8,6 @@
 import time
 
 import requests
-from langchain_core.prompts import ChatPromptTemplate
 from langsmith import traceable
 
 from comps import (
@@ -48,14 +47,23 @@ def reranking(input: SearchedDoc) -> LLMParamsDoc:
             context_str = context_str + " " + input.retrieved_docs[best_response["index"]].text
         if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
             # chinese context
-            template = "仅基于以下背景回答问题:\n{context}\n问题: {question}"
+            template = """
+### 你将扮演一个乐于助人、尊重他人并诚实的助手，你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案，请避免分享不准确的信息。
+### 搜索结果：{context}
+### 问题：{question}
+### 回答：
+"""
         else:
-            template = """Answer the question based only on the following context:
-    {context}
-    Question: {question}
-            """
-        prompt = ChatPromptTemplate.from_template(template)
-        final_prompt = prompt.format(context=context_str, question=input.initial_query)
+            template = """
+### You are a helpful, respectful and honest assistant to help the user with questions. \
+Please refer to the search results obtained from the local knowledge base. \
+But be careful to not incorporate the information that you think is not relevant to the question. \
+If you don't know the answer to a question, please don't share false information. \
+### Search results: {context} \n
+### Question: {question} \n
+### Answer:
+"""
+        final_prompt = template.format(context=context_str, question=input.initial_query)
         statistics_dict["opea_service@reranking_tgi_gaudi"].append_latency(time.time() - start, None)
         return LLMParamsDoc(query=final_prompt.strip())
     else:

diff --git a/comps/retrievers/README.md → comps/retrievers/langchain/README.md b/comps/retrievers/README.md → comps/retrievers/langchain/README.md
@@ -8,12 +8,12 @@ Overall, this microservice provides robust backend support for applications requ
 
 # Retriever Microservice with Redis
 
-For details, please refer to this [readme](langchain/redis/README.md)
+For details, please refer to this [readme](redis/README.md)
 
 # Retriever Microservice with Milvus
 
-For details, please refer to this [readme](langchain/milvus/README.md)
+For details, please refer to this [readme](milvus/README.md)
 
 # Retriever Microservice with PGVector
 
-For details, please refer to this [readme](langchain/pgvector/README.md)
+For details, please refer to this [readme](pgvector/README.md)
diff --git a/comps/retrievers/langchain/pinecone/docker/Dockerfile b/comps/retrievers/langchain/pinecone/docker/Dockerfile
@@ -20,7 +20,7 @@ RUN chmod +x /home/user/comps/retrievers/langchain/pinecone/run.sh
 USER user
 
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /home/user/comps/retrievers/requirements.txt
+    pip install --no-cache-dir -r /home/user/comps/retrievers/langchain/pinecone/requirements.txt
 
 ENV PYTHONPATH=$PYTHONPATH:/home/user
 

diff --git a/comps/retrievers/llamaindex/README.md b/comps/retrievers/llamaindex/README.md
@@ -0,0 +1,97 @@
+# Retriever Microservice
+
+This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector.
+
+The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval.
+
+Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial.
+
+# 🚀1. Start Microservice with Python (Option 1)
+
+To start the retriever microservice, you must first install the required python packages.
+
+## 1.1 Install Requirements
+
+```bash
+pip install -r requirements.txt
+```
+
+## 1.2 Setup VectorDB Service
+
+You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database.
+
+As for Redis, you could start a docker container using the following commands.
+Remember to ingest data into it manually.
+
+```bash
+docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9
+```
+
+And then ingest data into the Redis VectorDB using the methods described in the dataprep microservice.
+
+## 1.3 Start Retriever Service
+
+```bash
+python retriever_redis.py
+```
+
+# 🚀2. Start Microservice with Docker (Option 2)
+
+## 2.1 Setup Environment Variables
+
+```bash
+export REDIS_URL="redis://${your_ip}:6379"
+export INDEX_NAME=${your_index_name}
+export LANGCHAIN_TRACING_V2=true
+export LANGCHAIN_API_KEY=${your_langchain_api_key}
+export LANGCHAIN_PROJECT="opea/retrievers"
+```
+
+## 2.2 Build Docker Image
+
+```bash
+cd ../../
+docker build -t opea/retriever-redis-llamaindex:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/llamaindex/docker/Dockerfile .
+```
+
+To start a docker container, you have two options:
+
+- A. Run Docker with CLI
+- B. Run Docker with Docker Compose
+
+You can choose one as needed.
+
+## 2.3 Run Docker with CLI (Option A)
+
+```bash
+docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-redis:latest
+```
+
+## 2.4 Run Docker with Docker Compose (Option B)
+
+```bash
+cd llamaindex/docker
+docker compose -f docker_compose_retriever.yaml up -d
+```
+
+# 🚀3. Consume Retriever Service
+
+## 3.1 Check Service Status
+
+```bash
+curl http://localhost:7000/v1/health_check \
+  -X GET \
+  -H 'Content-Type: application/json'
+```
+
+## 3.2 Consume Retriever Service
+
+To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python.
+
+```bash
+your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+curl http://${your_ip}:7000/v1/retrieval \
+  -X POST \
+  -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \
+  -H 'Content-Type: application/json'
+```
diff --git a/comps/retrievers/llamaindex/__init__.py b/comps/retrievers/llamaindex/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/retrievers/llamaindex/docker/Dockerfile b/comps/retrievers/llamaindex/docker/Dockerfile
@@ -0,0 +1,27 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM ubuntu:22.04
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+COPY comps /home/user/comps
+
+USER user
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/retrievers/llamaindex/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/retrievers/llamaindex
+
+ENTRYPOINT ["python", "retriever_redis.py"]
diff --git a/comps/retrievers/llamaindex/docker/docker_compose_retriever.yaml b/comps/retrievers/llamaindex/docker/docker_compose_retriever.yaml
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  retriever:
+    image: opea/retriever-redis:latest
+    container_name: retriever-redis-server
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/retrievers/llamaindex/redis_config.py b/comps/retrievers/llamaindex/redis_config.py
@@ -0,0 +1,77 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+
+def get_boolean_env_var(var_name, default_value=False):
+    """Retrieve the boolean value of an environment variable.
+
+    Args:
+    var_name (str): The name of the environment variable to retrieve.
+    default_value (bool): The default value to return if the variable
+    is not found.
+
+    Returns:
+    bool: The value of the environment variable, interpreted as a boolean.
+    """
+    true_values = {"true", "1", "t", "y", "yes"}
+    false_values = {"false", "0", "f", "n", "no"}
+
+    # Retrieve the environment variable's value
+    value = os.getenv(var_name, "").lower()
+
+    # Decide the boolean value based on the content of the string
+    if value in true_values:
+        return True
+    elif value in false_values:
+        return False
+    else:
+        return default_value
+
+
+# Whether or not to enable langchain debugging
+DEBUG = get_boolean_env_var("DEBUG", False)
+# Set DEBUG env var to "true" if you wish to enable LC debugging module
+if DEBUG:
+    import langchain
+
+    langchain.debug = True
+
+
+# Embedding model
+EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
+
+
+# Redis Connection Information
+REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
+REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
+
+
+def format_redis_conn_from_env():
+    redis_url = os.getenv("REDIS_URL", None)
+    if redis_url:
+        return redis_url
+    else:
+        using_ssl = get_boolean_env_var("REDIS_SSL", False)
+        start = "rediss://" if using_ssl else "redis://"
+
+        # if using RBAC
+        password = os.getenv("REDIS_PASSWORD", None)
+        username = os.getenv("REDIS_USERNAME", "default")
+        if password is not None:
+            start += f"{username}:{password}@"
+
+        return start + f"{REDIS_HOST}:{REDIS_PORT}"
+
+
+REDIS_URL = format_redis_conn_from_env()
+
+# Vector Index Configuration
+INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis")
+
+
+current_file_path = os.path.abspath(__file__)
+parent_dir = os.path.dirname(current_file_path)
+REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema.yml")
+INDEX_SCHEMA = os.path.join(parent_dir, REDIS_SCHEMA)