diff --git a/comps/llms/text-generation/tgi/llm.py b/comps/llms/text-generation/tgi/llm.py index edde95c1d..e267c21dc 100644 --- a/comps/llms/text-generation/tgi/llm.py +++ b/comps/llms/text-generation/tgi/llm.py @@ -5,7 +5,7 @@ import time from fastapi.responses import StreamingResponse -from langchain_community.llms import HuggingFaceEndpoint +from huggingface_hub import AsyncInferenceClient from langsmith import traceable from comps import ( @@ -28,26 +28,23 @@ ) @traceable(run_type="llm") @register_statistics(names=["opea_service@llm_tgi"]) -def llm_generate(input: LLMParamsDoc): +async def llm_generate(input: LLMParamsDoc): + stream_gen_time = [] start = time.time() - llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") - llm = HuggingFaceEndpoint( - endpoint_url=llm_endpoint, - max_new_tokens=input.max_new_tokens, - top_k=input.top_k, - top_p=input.top_p, - typical_p=input.typical_p, - temperature=input.temperature, - repetition_penalty=input.repetition_penalty, - streaming=input.streaming, - timeout=600, - ) if input.streaming: - stream_gen_time = [] async def stream_generator(): chat_response = "" - async for text in llm.astream(input.query): + text_generation = await llm.text_generation( + prompt=input.query, + stream=input.streaming, + max_new_tokens=input.max_new_tokens, + repetition_penalty=input.repetition_penalty, + temperature=input.temperature, + top_k=input.top_k, + top_p=input.top_p, + ) + async for text in text_generation: stream_gen_time.append(time.time() - start) chat_response += text chunk_repr = repr(text.encode("utf-8")) @@ -59,10 +56,23 @@ async def stream_generator(): return StreamingResponse(stream_generator(), media_type="text/event-stream") else: - response = llm.invoke(input.query) + response = await llm.text_generation( + prompt=input.query, + stream=input.streaming, + max_new_tokens=input.max_new_tokens, + repetition_penalty=input.repetition_penalty, + temperature=input.temperature, + top_k=input.top_k, + top_p=input.top_p, + ) statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None) return GeneratedDoc(text=response, prompt=input.query) if __name__ == "__main__": + llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") + llm = AsyncInferenceClient( + model=llm_endpoint, + timeout=600, + ) opea_microservices["opea_service@llm_tgi"].start() diff --git a/comps/llms/text-generation/tgi/requirements.txt b/comps/llms/text-generation/tgi/requirements.txt index 92c652351..e87736e7f 100644 --- a/comps/llms/text-generation/tgi/requirements.txt +++ b/comps/llms/text-generation/tgi/requirements.txt @@ -1,7 +1,6 @@ docarray[full] fastapi huggingface_hub -langchain==0.1.16 langsmith opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/reranks/requirements.txt b/comps/reranks/requirements.txt index 103349907..638625d99 100644 --- a/comps/reranks/requirements.txt +++ b/comps/reranks/requirements.txt @@ -1,6 +1,5 @@ docarray[full] fastapi -langchain langsmith opentelemetry-api opentelemetry-exporter-otlp diff --git a/comps/reranks/langchain/__init__.py b/comps/reranks/tei/__init__.py similarity index 100% rename from comps/reranks/langchain/__init__.py rename to comps/reranks/tei/__init__.py diff --git a/comps/reranks/langchain/docker/Dockerfile b/comps/reranks/tei/docker/Dockerfile similarity index 88% rename from comps/reranks/langchain/docker/Dockerfile rename to comps/reranks/tei/docker/Dockerfile index 2daa22698..24949c85f 100644 --- a/comps/reranks/langchain/docker/Dockerfile +++ b/comps/reranks/tei/docker/Dockerfile @@ -27,7 +27,7 @@ RUN pip install --no-cache-dir --upgrade pip && \ ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/reranks/langchain +WORKDIR /home/user/comps/reranks/tei -ENTRYPOINT ["python", "reranking_tei_xeon.py"] +ENTRYPOINT ["python", "reranking_tei.py"] diff --git a/comps/reranks/langchain/docker/docker_compose_reranking.yaml b/comps/reranks/tei/docker/docker_compose_reranking.yaml similarity index 100% rename from comps/reranks/langchain/docker/docker_compose_reranking.yaml rename to comps/reranks/tei/docker/docker_compose_reranking.yaml diff --git a/comps/reranks/langchain/local_reranking.py b/comps/reranks/tei/local_reranking.py similarity index 100% rename from comps/reranks/langchain/local_reranking.py rename to comps/reranks/tei/local_reranking.py diff --git a/comps/reranks/langchain/reranking_tei_xeon.py b/comps/reranks/tei/reranking_tei.py similarity index 67% rename from comps/reranks/langchain/reranking_tei_xeon.py rename to comps/reranks/tei/reranking_tei.py index 88bf0ddad..1beaa83f7 100644 --- a/comps/reranks/langchain/reranking_tei_xeon.py +++ b/comps/reranks/tei/reranking_tei.py @@ -8,7 +8,6 @@ import time import requests -from langchain_core.prompts import ChatPromptTemplate from langsmith import traceable from comps import ( @@ -48,14 +47,23 @@ def reranking(input: SearchedDoc) -> LLMParamsDoc: context_str = context_str + " " + input.retrieved_docs[best_response["index"]].text if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3: # chinese context - template = "仅基于以下背景回答问题:\n{context}\n问题: {question}" + template = """ +### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。 +### 搜索结果:{context} +### 问题:{question} +### 回答: +""" else: - template = """Answer the question based only on the following context: - {context} - Question: {question} - """ - prompt = ChatPromptTemplate.from_template(template) - final_prompt = prompt.format(context=context_str, question=input.initial_query) + template = """ +### You are a helpful, respectful and honest assistant to help the user with questions. \ +Please refer to the search results obtained from the local knowledge base. \ +But be careful to not incorporate the information that you think is not relevant to the question. \ +If you don't know the answer to a question, please don't share false information. \ +### Search results: {context} \n +### Question: {question} \n +### Answer: +""" + final_prompt = template.format(context=context_str, question=input.initial_query) statistics_dict["opea_service@reranking_tgi_gaudi"].append_latency(time.time() - start, None) return LLMParamsDoc(query=final_prompt.strip()) else: diff --git a/comps/retrievers/README.md b/comps/retrievers/langchain/README.md similarity index 84% rename from comps/retrievers/README.md rename to comps/retrievers/langchain/README.md index f6bbb01ea..3de5cab21 100644 --- a/comps/retrievers/README.md +++ b/comps/retrievers/langchain/README.md @@ -8,12 +8,12 @@ Overall, this microservice provides robust backend support for applications requ # Retriever Microservice with Redis -For details, please refer to this [readme](langchain/redis/README.md) +For details, please refer to this [readme](redis/README.md) # Retriever Microservice with Milvus -For details, please refer to this [readme](langchain/milvus/README.md) +For details, please refer to this [readme](milvus/README.md) # Retriever Microservice with PGVector -For details, please refer to this [readme](langchain/pgvector/README.md) +For details, please refer to this [readme](pgvector/README.md) diff --git a/comps/retrievers/langchain/pinecone/docker/Dockerfile b/comps/retrievers/langchain/pinecone/docker/Dockerfile index 018b63354..7eedfab10 100644 --- a/comps/retrievers/langchain/pinecone/docker/Dockerfile +++ b/comps/retrievers/langchain/pinecone/docker/Dockerfile @@ -20,7 +20,7 @@ RUN chmod +x /home/user/comps/retrievers/langchain/pinecone/run.sh USER user RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/retrievers/requirements.txt + pip install --no-cache-dir -r /home/user/comps/retrievers/langchain/pinecone/requirements.txt ENV PYTHONPATH=$PYTHONPATH:/home/user diff --git a/comps/retrievers/llamaindex/README.md b/comps/retrievers/llamaindex/README.md new file mode 100644 index 000000000..3f6db8899 --- /dev/null +++ b/comps/retrievers/llamaindex/README.md @@ -0,0 +1,97 @@ +# Retriever Microservice + +This retriever microservice is a highly efficient search service designed for handling and retrieving embedding vectors. It operates by receiving an embedding vector as input and conducting a similarity search against vectors stored in a VectorDB database. Users must specify the VectorDB's URL and the index name, and the service searches within that index to find documents with the highest similarity to the input vector. + +The service primarily utilizes similarity measures in vector space to rapidly retrieve contentually similar documents. The vector-based retrieval approach is particularly suited for handling large datasets, offering fast and accurate search results that significantly enhance the efficiency and quality of information retrieval. + +Overall, this microservice provides robust backend support for applications requiring efficient similarity searches, playing a vital role in scenarios such as recommendation systems, information retrieval, or any other context where precise measurement of document similarity is crucial. + +# 🚀1. Start Microservice with Python (Option 1) + +To start the retriever microservice, you must first install the required python packages. + +## 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` + +## 1.2 Setup VectorDB Service + +You need to setup your own VectorDB service (Redis in this example), and ingest your knowledge documents into the vector database. + +As for Redis, you could start a docker container using the following commands. +Remember to ingest data into it manually. + +```bash +docker run -d --name="redis-vector-db" -p 6379:6379 -p 8001:8001 redis/redis-stack:7.2.0-v9 +``` + +And then ingest data into the Redis VectorDB using the methods described in the dataprep microservice. + +## 1.3 Start Retriever Service + +```bash +python retriever_redis.py +``` + +# 🚀2. Start Microservice with Docker (Option 2) + +## 2.1 Setup Environment Variables + +```bash +export REDIS_URL="redis://${your_ip}:6379" +export INDEX_NAME=${your_index_name} +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY=${your_langchain_api_key} +export LANGCHAIN_PROJECT="opea/retrievers" +``` + +## 2.2 Build Docker Image + +```bash +cd ../../ +docker build -t opea/retriever-redis-llamaindex:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/llamaindex/docker/Dockerfile . +``` + +To start a docker container, you have two options: + +- A. Run Docker with CLI +- B. Run Docker with Docker Compose + +You can choose one as needed. + +## 2.3 Run Docker with CLI (Option A) + +```bash +docker run -d --name="retriever-redis-server" -p 7000:7000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME opea/retriever-redis:latest +``` + +## 2.4 Run Docker with Docker Compose (Option B) + +```bash +cd llamaindex/docker +docker compose -f docker_compose_retriever.yaml up -d +``` + +# 🚀3. Consume Retriever Service + +## 3.1 Check Service Status + +```bash +curl http://localhost:7000/v1/health_check \ + -X GET \ + -H 'Content-Type: application/json' +``` + +## 3.2 Consume Retriever Service + +To consume the Retriever Microservice, you can generate a mock embedding vector of length 768 with Python. + +```bash +your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://${your_ip}:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' +``` diff --git a/comps/retrievers/llamaindex/__init__.py b/comps/retrievers/llamaindex/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/retrievers/llamaindex/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/retrievers/llamaindex/docker/Dockerfile b/comps/retrievers/llamaindex/docker/Dockerfile new file mode 100644 index 000000000..7d9cd64be --- /dev/null +++ b/comps/retrievers/llamaindex/docker/Dockerfile @@ -0,0 +1,27 @@ + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM ubuntu:22.04 + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY comps /home/user/comps + +USER user + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/retrievers/llamaindex/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/retrievers/llamaindex + +ENTRYPOINT ["python", "retriever_redis.py"] diff --git a/comps/retrievers/llamaindex/docker/docker_compose_retriever.yaml b/comps/retrievers/llamaindex/docker/docker_compose_retriever.yaml new file mode 100644 index 000000000..0c9fd9991 --- /dev/null +++ b/comps/retrievers/llamaindex/docker/docker_compose_retriever.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "6379:6379" + - "8001:8001" + retriever: + image: opea/retriever-redis:latest + container_name: retriever-redis-server + ports: + - "7000:7000" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/retrievers/llamaindex/redis_config.py b/comps/retrievers/llamaindex/redis_config.py new file mode 100644 index 000000000..93946fcef --- /dev/null +++ b/comps/retrievers/llamaindex/redis_config.py @@ -0,0 +1,77 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + + +def get_boolean_env_var(var_name, default_value=False): + """Retrieve the boolean value of an environment variable. + + Args: + var_name (str): The name of the environment variable to retrieve. + default_value (bool): The default value to return if the variable + is not found. + + Returns: + bool: The value of the environment variable, interpreted as a boolean. + """ + true_values = {"true", "1", "t", "y", "yes"} + false_values = {"false", "0", "f", "n", "no"} + + # Retrieve the environment variable's value + value = os.getenv(var_name, "").lower() + + # Decide the boolean value based on the content of the string + if value in true_values: + return True + elif value in false_values: + return False + else: + return default_value + + +# Whether or not to enable langchain debugging +DEBUG = get_boolean_env_var("DEBUG", False) +# Set DEBUG env var to "true" if you wish to enable LC debugging module +if DEBUG: + import langchain + + langchain.debug = True + + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + + +# Redis Connection Information +REDIS_HOST = os.getenv("REDIS_HOST", "localhost") +REDIS_PORT = int(os.getenv("REDIS_PORT", 6379)) + + +def format_redis_conn_from_env(): + redis_url = os.getenv("REDIS_URL", None) + if redis_url: + return redis_url + else: + using_ssl = get_boolean_env_var("REDIS_SSL", False) + start = "rediss://" if using_ssl else "redis://" + + # if using RBAC + password = os.getenv("REDIS_PASSWORD", None) + username = os.getenv("REDIS_USERNAME", "default") + if password is not None: + start += f"{username}:{password}@" + + return start + f"{REDIS_HOST}:{REDIS_PORT}" + + +REDIS_URL = format_redis_conn_from_env() + +# Vector Index Configuration +INDEX_NAME = os.getenv("INDEX_NAME", "rag-redis") + + +current_file_path = os.path.abspath(__file__) +parent_dir = os.path.dirname(current_file_path) +REDIS_SCHEMA = os.getenv("REDIS_SCHEMA", "redis_schema.yml") +INDEX_SCHEMA = os.path.join(parent_dir, REDIS_SCHEMA) diff --git a/comps/retrievers/llamaindex/requirements.txt b/comps/retrievers/llamaindex/requirements.txt new file mode 100644 index 000000000..c73476e4d --- /dev/null +++ b/comps/retrievers/llamaindex/requirements.txt @@ -0,0 +1,13 @@ +docarray[full] +easyocr +fastapi +langsmith +llama-index-vector-stores-redis +llama_index +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +pymupdf +redis +sentence_transformers +shortuuid diff --git a/comps/retrievers/llamaindex/retriever_redis.py b/comps/retrievers/llamaindex/retriever_redis.py new file mode 100644 index 000000000..965aecd88 --- /dev/null +++ b/comps/retrievers/llamaindex/retriever_redis.py @@ -0,0 +1,59 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from langsmith import traceable +from llama_index.core.vector_stores.types import VectorStoreQuery +from llama_index.vector_stores.redis import RedisVectorStore +from redis_config import INDEX_NAME, REDIS_URL +from redisvl.schema import IndexSchema + +from comps import EmbedDoc768, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice + +tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") + + +@register_microservice( + name="opea_service@retriever_redis", + service_type=ServiceType.RETRIEVER, + endpoint="/v1/retrieval", + host="0.0.0.0", + port=7000, +) +@traceable(run_type="retriever") +def retrieve(input: EmbedDoc768) -> SearchedDoc: + vector_store_query = VectorStoreQuery(query_embedding=input.embedding) + search_res = vector_store.query(query=vector_store_query) + searched_docs = [] + for node, id, similarity in zip(search_res.nodes, search_res.ids, search_res.similarities): + searched_docs.append(TextDoc(text=node.get_content())) + result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) + return result + + +if __name__ == "__main__": + custom_schema = IndexSchema.from_dict( + { + "index": {"name": INDEX_NAME, "prefix": "doc"}, + "fields": [ + {"name": "id", "type": "tag"}, + {"name": "doc_id", "type": "tag"}, + {"name": "text", "type": "text"}, + {"name": "content", "type": "text"}, + {"name": "source", "type": "text"}, + {"name": "start_index", "type": "numeric"}, + { + "name": "vector", + "type": "vector", + "attrs": {"dims": 768, "algorithm": "HNSW", "date_type": "FLOAT32"}, + }, + ], + } + ) + + vector_store = RedisVectorStore( + schema=custom_schema, + redis_url=REDIS_URL, + ) + opea_microservices["opea_service@retriever_redis"].start() diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh index e08a885a9..6e62098aa 100644 --- a/tests/test_llms_text-generation_tgi.sh +++ b/tests/test_llms_text-generation_tgi.sh @@ -9,7 +9,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH - docker build --no-cache -t opea/llm-tgi:comps -f comps/llms/text-generation/tgi/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/llm-tgi:comps -f comps/llms/text-generation/tgi/Dockerfile . } function start_service() { @@ -17,7 +17,7 @@ function start_service() { export your_hf_llm_model="Intel/neural-chat-7b-v3-3" # Remember to set HF_TOKEN before invoking this test! export HF_TOKEN=${HF_TOKEN} - docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model} + docker run -d --name="test-comps-llm-tgi-endpoint" -e https_proxy -e http_proxy -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model} export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}" tei_service_port=5005 @@ -41,7 +41,7 @@ function validate_microservice() { tei_service_port=5005 http_proxy="" curl http://${ip_address}:${tei_service_port}/v1/chat/completions \ -X POST \ - -d '{"query":"What is Deep Learning?"}' \ + -d '{"query":"What is Deep Learning?", "max_new_tokens": 128}' \ -H 'Content-Type: application/json' docker logs test-comps-llm-tgi-endpoint docker logs test-comps-llm-tgi-server