Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Improve LFQA Web Example #5504

Merged
merged 3 commits into from
Aug 4, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 35 additions & 14 deletions examples/web_lfqa_improved.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
from typing import Dict, Any

from haystack import Pipeline
from haystack.nodes import PromptNode, PromptTemplate, TopPSampler
Expand All @@ -11,19 +12,26 @@
if not search_key:
raise ValueError("Please set the SERPERDEV_API_KEY environment variable")

openai_key = os.environ.get("OPENAI_API_KEY")
if not openai_key:
raise ValueError("Please set the OPENAI_API_KEY environment variable")

models_config: Dict[str, Any] = {
"openai": {"api_key": os.environ.get("OPENAI_API_KEY"), "model_name": "gpt-3.5-turbo"},
"anthropic": {"api_key": os.environ.get("ANTHROPIC_API_KEY"), "model_name": "claude-instant-1"},
"hf": {"api_key": os.environ.get("HF_API_KEY"), "model_name": "tiiuae/falcon-7b-instruct"},
}
prompt_text = """
Synthesize a comprehensive answer from the following most relevant paragraphs and the given question.
Provide a clear and concise response that summarizes the key points and information presented in the paragraphs.
Your answer should be in your own words and be no longer than 50 words.
\n\n Paragraphs: {documents} \n\n Question: {query} \n\n Answer:
Synthesize a comprehensive answer from the provided paragraphs and the given question.\n
Answer in full sentences and paragraphs, don't use bullet points or lists.\n
If the answer includes multiple chronological events, order them chronologically.\n
\n\n Paragraphs: {join(documents)} \n\n Question: {query} \n\n Answer:
"""

stream = True
model: Dict[str, str] = models_config["openai"]
prompt_node = PromptNode(
"gpt-3.5-turbo", default_prompt_template=PromptTemplate(prompt_text), api_key=openai_key, max_length=768
model["model_name"],
default_prompt_template=PromptTemplate(prompt_text),
api_key=model["api_key"],
max_length=768,
model_kwargs={"stream": stream},
)

web_retriever = WebRetriever(api_key=search_key, top_search_results=5, mode="preprocessed_documents", top_k=50)
Expand All @@ -39,12 +47,25 @@
pipeline.add_node(component=litm_ranker, name="LostInTheMiddleRanker", inputs=["DiversityRanker"])
pipeline.add_node(component=prompt_node, name="PromptNode", inputs=["LostInTheMiddleRanker"])

logger = logging.getLogger("boilerpy3")
logger.setLevel(logging.CRITICAL)
logging.disable(logging.CRITICAL)


questions = [
"What are the main reasons for long-standing animosities between Russia and Poland?",
"What are the primary causes and effects of climate change on global and local scales?",
"What were the key events and influences that led to Renaissance; how did these developments "
"shape modern Western culture?",
"How have advances in technology in the 21st century affected job markets and economies around the world?",
"What are the main reasons behind the Israel-Palestine conflict and how have they evolved over time?",
"How has the European Union influenced the political, economic, and social dynamics of Europe?",
]

questions = ["What are the reasons for long-standing animosities between Russia and Poland?"]
print(f"\nRunning pipeline with {model['model_name']}\n")

for q in questions:
print(f"Question: {q}")
print(f"\nQuestion: {q}")
if stream:
print("Answer:")
response = pipeline.run(query=q)
print(f"Answer: {response['results'][0]}")
if not stream:
print(f"Answer: {response['results'][0]}")