Skip to content

Commit

Permalink
Support llamaindex for retrieval microservice and remove langchain de…
Browse files Browse the repository at this point in the history
…pendency for llm and rerank microservice (#152)

* remove langchain dependency for llm and rerank

Signed-off-by: lvliang-intel <[email protected]>

* add llamaindex support for retrieval

Signed-off-by: lvliang-intel <[email protected]>

* fix schema issue

Signed-off-by: lvliang-intel <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix dockerfile

Signed-off-by: lvliang-intel <[email protected]>

* update readme

Signed-off-by: lvliang-intel <[email protected]>

* update reamde

Signed-off-by: lvliang-intel <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix entrypoint

Signed-off-by: lvliang-intel <[email protected]>

* add dataprep process in test script

Signed-off-by: lvliang-intel <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix redis url for dataprep

Signed-off-by: lvliang-intel <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update readme

Signed-off-by: lvliang-intel <[email protected]>

* update code

Signed-off-by: lvliang-intel <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: lvliang-intel <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: root <[email protected]>
  • Loading branch information
3 people committed Jul 9, 2024
1 parent 9b658f4 commit 61795fd
Show file tree
Hide file tree
Showing 18 changed files with 356 additions and 36 deletions.
44 changes: 27 additions & 17 deletions comps/llms/text-generation/tgi/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import time

from fastapi.responses import StreamingResponse
from langchain_community.llms import HuggingFaceEndpoint
from huggingface_hub import AsyncInferenceClient
from langsmith import traceable

from comps import (
Expand All @@ -28,26 +28,23 @@
)
@traceable(run_type="llm")
@register_statistics(names=["opea_service@llm_tgi"])
def llm_generate(input: LLMParamsDoc):
async def llm_generate(input: LLMParamsDoc):
stream_gen_time = []
start = time.time()
llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
llm = HuggingFaceEndpoint(
endpoint_url=llm_endpoint,
max_new_tokens=input.max_new_tokens,
top_k=input.top_k,
top_p=input.top_p,
typical_p=input.typical_p,
temperature=input.temperature,
repetition_penalty=input.repetition_penalty,
streaming=input.streaming,
timeout=600,
)
if input.streaming:
stream_gen_time = []

async def stream_generator():
chat_response = ""
async for text in llm.astream(input.query):
text_generation = await llm.text_generation(
prompt=input.query,
stream=input.streaming,
max_new_tokens=input.max_new_tokens,
repetition_penalty=input.repetition_penalty,
temperature=input.temperature,
top_k=input.top_k,
top_p=input.top_p,
)
async for text in text_generation:
stream_gen_time.append(time.time() - start)
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
Expand All @@ -59,10 +56,23 @@ async def stream_generator():

return StreamingResponse(stream_generator(), media_type="text/event-stream")
else:
response = llm.invoke(input.query)
response = await llm.text_generation(
prompt=input.query,
stream=input.streaming,
max_new_tokens=input.max_new_tokens,
repetition_penalty=input.repetition_penalty,
temperature=input.temperature,
top_k=input.top_k,
top_p=input.top_p,
)
statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None)
return GeneratedDoc(text=response, prompt=input.query)


if __name__ == "__main__":
llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
llm = AsyncInferenceClient(
model=llm_endpoint,
timeout=600,
)
opea_microservices["opea_service@llm_tgi"].start()
1 change: 0 additions & 1 deletion comps/llms/text-generation/tgi/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
docarray[full]
fastapi
huggingface_hub
langchain==0.1.16
langsmith
opentelemetry-api
opentelemetry-exporter-otlp
Expand Down
1 change: 0 additions & 1 deletion comps/reranks/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
docarray[full]
fastapi
langchain
langsmith
opentelemetry-api
opentelemetry-exporter-otlp
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RUN pip install --no-cache-dir --upgrade pip && \

ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /home/user/comps/reranks/langchain
WORKDIR /home/user/comps/reranks/tei

ENTRYPOINT ["python", "reranking_tei_xeon.py"]
ENTRYPOINT ["python", "reranking_tei.py"]

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import time

import requests
from langchain_core.prompts import ChatPromptTemplate
from langsmith import traceable

from comps import (
Expand Down Expand Up @@ -48,14 +47,23 @@ def reranking(input: SearchedDoc) -> LLMParamsDoc:
context_str = context_str + " " + input.retrieved_docs[best_response["index"]].text
if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
# chinese context
template = "仅基于以下背景回答问题:\n{context}\n问题: {question}"
template = """
### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。
### 搜索结果:{context}
### 问题:{question}
### 回答:
"""
else:
template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
final_prompt = prompt.format(context=context_str, question=input.initial_query)
template = """
### You are a helpful, respectful and honest assistant to help the user with questions. \
Please refer to the search results obtained from the local knowledge base. \
But be careful to not incorporate the information that you think is not relevant to the question. \
If you don't know the answer to a question, please don't share false information. \
### Search results: {context} \n
### Question: {question} \n
### Answer:
"""
final_prompt = template.format(context=context_str, question=input.initial_query)
statistics_dict["opea_service@reranking_tgi_gaudi"].append_latency(time.time() - start, None)
return LLMParamsDoc(query=final_prompt.strip())
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ Overall, this microservice provides robust backend support for applications requ

# Retriever Microservice with Redis

For details, please refer to this [readme](langchain/redis/README.md)
For details, please refer to this [readme](redis/README.md)

# Retriever Microservice with Milvus

For details, please refer to this [readme](langchain/milvus/README.md)
For details, please refer to this [readme](milvus/README.md)

# Retriever Microservice with PGVector

For details, please refer to this [readme](langchain/pgvector/README.md)
For details, please refer to this [readme](pgvector/README.md)
2 changes: 1 addition & 1 deletion comps/retrievers/langchain/pinecone/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ RUN chmod +x /home/user/comps/retrievers/langchain/pinecone/run.sh
USER user

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/comps/retrievers/requirements.txt
pip install --no-cache-dir -r /home/user/comps/retrievers/langchain/pinecone/requirements.txt

ENV PYTHONPATH=$PYTHONPATH:/home/user

Expand Down
97 changes: 97 additions & 0 deletions comps/retrievers/llamaindex/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Retriever Microservice

This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector.

The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval.

Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial.

# 🚀1. Start Microservice with Python (Option 1)

To start the retriever microservice, you must first install the required python packages.

## 1.1 Install Requirements

```bash
pip install -r requirements.txt
```

## 1.2 Setup VectorDB Service

You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database.

As for Redis, you could start a docker container using the following commands.
Remember to ingest data into it manually.

```bash
docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9
```

And then ingest data into the Redis VectorDB using the methods described in the dataprep microservice.

## 1.3 Start Retriever Service

```bash
python retriever_redis.py
```

# 🚀2. Start Microservice with Docker (Option 2)

## 2.1 Setup Environment Variables

```bash
export REDIS_URL="redis://${your_ip}:6379"
export INDEX_NAME=${your_index_name}
export LANGCHAIN_TRACING_V2=true
export LANGCHAIN_API_KEY=${your_langchain_api_key}
export LANGCHAIN_PROJECT="opea/retrievers"
```

## 2.2 Build Docker Image

```bash
cd ../../
docker build -t opea/retriever-redis-llamaindex:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/llamaindex/docker/Dockerfile .
```

To start a docker container, you have two options:

- A. Run Docker with CLI
- B. Run Docker with Docker Compose

You can choose one as needed.

## 2.3 Run Docker with CLI (Option A)

```bash
docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-redis:latest
```

## 2.4 Run Docker with Docker Compose (Option B)

```bash
cd llamaindex/docker
docker compose -f docker_compose_retriever.yaml up -d
```

# 🚀3. Consume Retriever Service

## 3.1 Check Service Status

```bash
curl http://localhost:7000/v1/health_check \
-X GET \
-H 'Content-Type: application/json'
```

## 3.2 Consume Retriever Service

To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python.

```bash
your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
curl http://${your_ip}:7000/v1/retrieval \
-X POST \
-d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \
-H 'Content-Type: application/json'
```
2 changes: 2 additions & 0 deletions comps/retrievers/llamaindex/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
27 changes: 27 additions & 0 deletions comps/retrievers/llamaindex/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

FROM ubuntu:22.04

RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
libgl1-mesa-glx \
libjemalloc-dev \
vim

RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/

COPY comps /home/user/comps

USER user

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/comps/retrievers/llamaindex/requirements.txt

ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /home/user/comps/retrievers/llamaindex

ENTRYPOINT ["python", "retriever_redis.py"]
29 changes: 29 additions & 0 deletions comps/retrievers/llamaindex/docker/docker_compose_retriever.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

version: "3.8"

services:
redis-vector-db:
image: redis/redis-stack:7.2.0-v9
container_name: redis-vector-db
ports:
- "6379:6379"
- "8001:8001"
retriever:
image: opea/retriever-redis:latest
container_name: retriever-redis-server
ports:
- "7000:7000"
ipc: host
environment:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
restart: unless-stopped

networks:
default:
driver: bridge
77 changes: 77 additions & 0 deletions comps/retrievers/llamaindex/redis_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import os


def get_boolean_env_var(var_name, default_value=False):
"""Retrieve the boolean value of an environment variable.
Args:
var_name (str): The name of the environment variable to retrieve.
default_value (bool): The default value to return if the variable
is not found.
Returns:
bool: The value of the environment variable, interpreted as a boolean.
"""
true_values = {"true", "1", "t", "y", "yes"}
false_values = {"false", "0", "f", "n", "no"}

# Retrieve the environment variable's value
value = os.getenv(var_name, "").lower()

# Decide the boolean value based on the content of the string
if value in true_values:
return True
elif value in false_values:
return False
else:
return default_value


# Whether or not to enable langchain debugging
DEBUG = get_boolean_env_var("DEBUG", False)
# Set DEBUG env var to "true" if you wish to enable LC debugging module
if DEBUG:
import langchain

langchain.debug = True


# Embedding model
EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")


# Redis Connection Information
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))


def format_redis_conn_from_env():
redis_url = os.getenv("REDIS_URL", None)
if redis_url:
return redis_url
else:
using_ssl = get_boolean_env_var("REDIS_SSL", False)
start = "rediss://" if using_ssl else "redis://"

# if using RBAC
password = os.getenv("REDIS_PASSWORD", None)
username = os.getenv("REDIS_USERNAME", "default")
if password is not None:
start += f"{username}:{password}@"

return start + f"{REDIS_HOST}:{REDIS_PORT}"


REDIS_URL = format_redis_conn_from_env()

# Vector Index Configuration
INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis")


current_file_path = os.path.abspath(__file__)
parent_dir = os.path.dirname(current_file_path)
REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema.yml")
INDEX_SCHEMA = os.path.join(parent_dir, REDIS_SCHEMA)
Loading

0 comments on commit 61795fd

Please sign in to comment.