Improve web_lfqa example

deepset-ai · Aug 3, 2023 · f9af1e1 · f9af1e1
1 parent 56cea8c
commit f9af1e1
Showing 1 changed file with 35 additions and 14 deletions.
diff --git a/examples/web_lfqa_improved.py b/examples/web_lfqa_improved.py
@@ -1,5 +1,6 @@
 import logging
 import os
+from typing import Dict, Any
 
 from haystack import Pipeline
 from haystack.nodes import PromptNode, PromptTemplate, TopPSampler
@@ -11,19 +12,26 @@
 if not search_key:
     raise ValueError("Please set the SERPERDEV_API_KEY environment variable")
 
-openai_key = os.environ.get("OPENAI_API_KEY")
-if not openai_key:
-    raise ValueError("Please set the OPENAI_API_KEY environment variable")
-
+models_config: Dict[str, Any] = {
+    "openai": {"api_key": os.environ.get("OPENAI_API_KEY"), "model_name": "gpt-3.5-turbo"},
+    "anthropic": {"api_key": os.environ.get("ANTHROPIC_API_KEY"), "model_name": "claude-instant-1"},
+    "hf": {"api_key": os.environ.get("HF_API_KEY"), "model_name": "tiiuae/falcon-7b-instruct"},
+}
 prompt_text = """
-Synthesize a comprehensive answer from the following most relevant paragraphs and the given question.
-Provide a clear and concise response that summarizes the key points and information presented in the paragraphs.
-Your answer should be in your own words and be no longer than 50 words.
-\n\n Paragraphs: {documents} \n\n Question: {query} \n\n Answer:
+Synthesize a comprehensive answer from the provided paragraphs and the given question.\n
+Answer in full sentences and paragraphs, don't use bullet points or lists.\n
+If the answer includes multiple chronological events, order them chronologically.\n
+\n\n Paragraphs: {join(documents)} \n\n Question: {query} \n\n Answer:
 """
 
+stream = True
+model: Dict[str, str] = models_config["openai"]
 prompt_node = PromptNode(
-    "gpt-3.5-turbo", default_prompt_template=PromptTemplate(prompt_text), api_key=openai_key, max_length=768
+    model["model_name"],
+    default_prompt_template=PromptTemplate(prompt_text),
+    api_key=model["api_key"],
+    max_length=768,
+    model_kwargs={"stream": stream},
 )
 
 web_retriever = WebRetriever(api_key=search_key, top_search_results=5, mode="preprocessed_documents", top_k=50)
@@ -39,12 +47,25 @@
 pipeline.add_node(component=litm_ranker, name="LostInTheMiddleRanker", inputs=["DiversityRanker"])
 pipeline.add_node(component=prompt_node, name="PromptNode", inputs=["LostInTheMiddleRanker"])
 
-logger = logging.getLogger("boilerpy3")
-logger.setLevel(logging.CRITICAL)
+logging.basicConfig(level=logging.CRITICAL)
+
+
+questions = [
+    "What are the main reasons for long-standing animosities between Russia and Poland?",
+    "What are the primary causes and effects of climate change on global and local scales?",
+    "What were the key events and influences that led to Renaissance; how did these developments "
+    "shape modern Western culture?",
+    "How have advances in technology in the 21st century affected job markets and economies around the world?",
+    "What are the main reasons behind the Israel-Palestine conflict and how have they evolved over time?",
+    "How has the European Union influenced the political, economic, and social dynamics of Europe?",
+]
 
-questions = ["What are the reasons for long-standing animosities between Russia and Poland?"]
+print(f"\nRunning pipeline with {model['model_name']}\n")
 
 for q in questions:
-    print(f"Question: {q}")
+    print(f"\nQuestion: {q}")
+    if stream:
+        print("Answer:")
     response = pipeline.run(query=q)
-    print(f"Answer: {response['results'][0]}")
+    if not stream:
+        print(f"Answer: {response['results'][0]}")