run-llama · logan-markewich · Jul 19, 2024 · Jun 27, 2024 · logan-markewich · Jun 27, 2024
diff --git a/docs/docs/module_guides/querying/response_synthesizers/index.md b/docs/docs/module_guides/querying/response_synthesizers/index.md
@@ -115,6 +115,7 @@ Several response synthesizers are implemented already in LlamaIndex:
   summarization purposes, but may lose detail due to truncation.
 - `no_text`: Only runs the retriever to fetch the nodes that would have been sent to the LLM,
   without actually sending them. Then can be inspected by checking `response.source_nodes`.
+- `context_only`: Returns a concatenated string of all text chunks.
 - `accumulate`: Given a set of text chunks and the query, apply the query to each text
   chunk while accumulating the responses into an array. Returns a concatenated string of all
   responses. Good for when you need to run the same query separately against each text

diff --git a/llama-index-core/llama_index/core/response_synthesizers/context_only.py b/llama-index-core/llama_index/core/response_synthesizers/context_only.py
@@ -0,0 +1,30 @@
+from typing import Any, Sequence
+
+from llama_index.core.prompts.mixin import PromptDictType
+from llama_index.core.response_synthesizers.base import BaseSynthesizer
+from llama_index.core.types import RESPONSE_TEXT_TYPE
+
+
+class ContextOnly(BaseSynthesizer):
+    def _get_prompts(self) -> PromptDictType:
+        """Get prompts."""
+        return {}
+
+    def _update_prompts(self, prompts: PromptDictType) -> None:
+        """Update prompts."""
+
+    def get_response(
+        self,
+        query_str: str,
+        text_chunks: Sequence[str],
+        **response_kwargs: Any,
+    ) -> RESPONSE_TEXT_TYPE:
+        return "\n\n".join(text_chunks)
+
+    async def aget_response(
+        self,
+        query_str: str,
+        text_chunks: Sequence[str],
+        **response_kwargs: Any,
+    ) -> RESPONSE_TEXT_TYPE:
+        return "\n\n".join(text_chunks)
diff --git a/llama-index-core/llama_index/core/response_synthesizers/factory.py b/llama-index-core/llama_index/core/response_synthesizers/factory.py
@@ -19,6 +19,7 @@
 from llama_index.core.response_synthesizers.compact_and_refine import (
     CompactAndRefine,
 )
+from llama_index.core.response_synthesizers.context_only import ContextOnly
 from llama_index.core.response_synthesizers.generation import Generation
 from llama_index.core.response_synthesizers.no_text import NoText
 from llama_index.core.response_synthesizers.refine import Refine
@@ -163,10 +164,15 @@ def get_response_synthesizer(
         )
     elif response_mode == ResponseMode.NO_TEXT:
         return NoText(
-            llm=llm,
+            callback_manager=callback_manager,
             streaming=streaming,
+            # deprecated
+            service_context=service_context,
+        )
+    elif response_mode == ResponseMode.CONTEXT_ONLY:
+        return ContextOnly(
             callback_manager=callback_manager,
-            prompt_helper=prompt_helper,
+            streaming=streaming,
             # deprecated
             service_context=service_context,
         )

diff --git a/llama-index-core/llama_index/core/response_synthesizers/type.py b/llama-index-core/llama_index/core/response_synthesizers/type.py
@@ -42,6 +42,9 @@ class ResponseMode(str, Enum):
     NO_TEXT = "no_text"
     """Return the retrieved context nodes, without synthesizing a final response."""
 
+    CONTEXT_ONLY = "context_only"
+    """Returns a concatenated string of all text chunks."""
+
     ACCUMULATE = "accumulate"
     """Synthesize a response for each text chunk, and then return the concatenation."""