From cf1e28431a17bfe9e1684abb9488fdc9bbfd163c Mon Sep 17 00:00:00 2001 From: Massimiliano Pippi Date: Wed, 28 Feb 2024 18:22:29 +0100 Subject: [PATCH] fix docstrings for the builder package (#7248) * fix docstrings for the builder package * remove dead test * Apply suggestions from code review Co-authored-by: Madeesh Kannan * review feedback * pylint --------- Co-authored-by: Madeesh Kannan --- .../components/builders/answer_builder.py | 101 ++++++++------- .../builders/dynamic_chat_prompt_builder.py | 118 +++++++----------- .../builders/dynamic_prompt_builder.py | 59 +++++---- .../components/builders/prompt_builder.py | 23 ++-- .../builders/test_prompt_builder.py | 9 -- 5 files changed, 149 insertions(+), 161 deletions(-) diff --git a/haystack/components/builders/answer_builder.py b/haystack/components/builders/answer_builder.py index 81dddc62b7..9be95cb1a2 100644 --- a/haystack/components/builders/answer_builder.py +++ b/haystack/components/builders/answer_builder.py @@ -11,25 +11,37 @@ @component class AnswerBuilder: """ - A component to parse the output of a Generator to `Answer` objects using regular expressions. + Takes a query and the replies a Generator returns as input and parses them into GeneratedAnswer objects. + Optionally, it also takes Documents and metadata from the Generator as inputs to enrich the GeneratedAnswer objects. + + Usage example: + ```python + from haystack.components.builders import AnswerBuilder + + builder = AnswerBuilder(pattern="Answer: (.*)") + builder.run(query="What's the answer?", replies=["This is an argument. Answer: This is the answer."]) + ``` """ def __init__(self, pattern: Optional[str] = None, reference_pattern: Optional[str] = None): """ - :param pattern: The regular expression pattern to use to extract the answer text from the generator output. - If not specified, the whole string is used as the answer. The regular expression can have at - most one capture group. If a capture group is present, the text matched by the capture group - is used as the answer. If no capture group is present, the whole match is used as the answer. - Examples: - `[^\\n]+$` finds "this is an answer" in a string "this is an argument.\\nthis is an answer". - `Answer: (.*)` finds "this is an answer" in a string "this is an argument. Answer: this is an answer". - Default: `None`. - :param reference_pattern: The regular expression pattern to use for parsing the document references. - We assume that references are specified as indices of the input documents and that - indices start at 1. - Example: `\\[(\\d+)\\]` finds "1" in a string "this is an answer[1]". - If not specified, no parsing is done, and all documents are referenced. - Default: `None`. + Creates an instance of the AnswerBuilder component. + + :param pattern: + The regular expression pattern to use to extract the answer text from the generator output. + If not specified, the whole string is used as the answer. The regular expression can have at + most one capture group. If a capture group is present, the text matched by the capture group + is used as the answer. If no capture group is present, the whole match is used as the answer. + Examples: + `[^\\n]+$` finds "this is an answer" in a string "this is an argument.\\nthis is an answer". + `Answer: (.*)` finds "this is an answer" in a string "this is an argument. Answer: this is an answer". + + :param reference_pattern: + The regular expression pattern to use for parsing the document references. + We assume that references are specified as indices of the input documents and that + indices start at 1. + Example: `\\[(\\d+)\\]` finds "1" in a string "this is an answer[1]". + If not specified, no parsing is done, and all documents are referenced. """ if pattern: AnswerBuilder._check_num_groups_in_regex(pattern) @@ -48,31 +60,34 @@ def run( reference_pattern: Optional[str] = None, ): """ - Parse the output of a Generator to `Answer` objects using regular expressions. - - :param query: The query used in the prompts for the Generator as a string. - :param replies: The output of the Generator. A list of strings. - :param meta: The metadata returned by the Generator. An optional list of dictionaries. If not specified, - the generated answer will contain no metadata. - :param documents: The documents used as input to the Generator. A list of `Document` objects. If - `documents` are specified, they are added to the `Answer` objects. - If both `documents` and `reference_pattern` are specified, the documents referenced in the - Generator output are extracted from the input documents and added to the `Answer` objects. - Default: `None`. - :param pattern: The regular expression pattern to use to extract the answer text from the generator output. - If not specified, the whole string is used as the answer. The regular expression can have at - most one capture group. If a capture group is present, the text matched by the capture group - is used as the answer. If no capture group is present, the whole match is used as the answer. - Examples: - `[^\\n]+$` finds "this is an answer" in a string "this is an argument.\\nthis is an answer". - `Answer: (.*)` finds "this is an answer" in a string "this is an argument. Answer: this is an answer". - Default: `None`. - :param reference_pattern: The regular expression pattern to use for parsing the document references. - We assume that references are specified as indices of the input documents and that - indices start at 1. - Example: `\\[(\\d+)\\]` finds "1" in a string "this is an answer[1]". - If not specified, no parsing is done, and all documents are referenced. - Default: `None`. + Turns the output of a Generator into `Answer` objects using regular expressions. + + :param query: + The query used in the prompts for the Generator. + :param replies: + The output of the Generator. + :param meta: + The metadata returned by the Generator. If not specified, the generated answer will contain no metadata. + :param documents: + The documents used as input to the Generator. If `documents` are specified, they are added to the `Answer` + objects. If both `documents` and `reference_pattern` are specified, the documents referenced in the + Generator output are extracted from the input documents and added to the `Answer` objects. + :param pattern: + The regular expression pattern to use to extract the answer text from the generator output. + If not specified, the whole string is used as the answer. The regular expression can have at + most one capture group. If a capture group is present, the text matched by the capture group + is used as the answer. If no capture group is present, the whole match is used as the answer. + Examples: + `[^\\n]+$` finds "this is an answer" in a string "this is an argument.\\nthis is an answer". + `Answer: (.*)` finds "this is an answer" in a string "this is an argument. Answer: this is an answer". + :param reference_pattern: + The regular expression pattern to use for parsing the document references. + We assume that references are specified as indices of the input documents and that indices start at 1. + Example: `\\[(\\d+)\\]` finds "1" in a string "this is an answer[1]". + If not specified, no parsing is done, and all documents are referenced. + + :returns: A dictionary with the following keys: + - `answers`: The answers obtained from the output of the generator """ if not meta: meta = [{}] * len(replies) @@ -113,8 +128,10 @@ def _extract_answer_string(reply: str, pattern: Optional[str] = None) -> str: Extract the answer string from the generator output using the specified pattern. If no pattern is specified, the whole string is used as the answer. - :param replies: The output of the Generator. A string. - :param pattern: The regular expression pattern to use to extract the answer text from the generator output. + :param replies: + The output of the Generator. A string. + :param pattern: + The regular expression pattern to use to extract the answer text from the generator output. """ if pattern is None: return reply diff --git a/haystack/components/builders/dynamic_chat_prompt_builder.py b/haystack/components/builders/dynamic_chat_prompt_builder.py index 7f4b0de0e6..12f6d0e5fb 100644 --- a/haystack/components/builders/dynamic_chat_prompt_builder.py +++ b/haystack/components/builders/dynamic_chat_prompt_builder.py @@ -13,17 +13,11 @@ class DynamicChatPromptBuilder: """ DynamicChatPromptBuilder is designed to construct dynamic prompts from a list of `ChatMessage` instances. It - integrates with Jinja2 templating for dynamic prompt generation. - - DynamicChatPromptBuilder assumes that the last user message in the list contains a template and renders it with resolved - pipeline variables and any additional template variables provided. - - You can provide additional template variables directly to the pipeline `run` method. They are then merged with the - variables resolved from the pipeline runtime. This allows for greater flexibility and customization of the - generated prompts based on runtime conditions and user inputs. - - The following example demonstrates how to use DynamicChatPromptBuilder to generate a chat prompt: + integrates with Jinja2 templating for dynamic prompt generation. It assumes that the last user message in the list + contains a template and renders it with variables provided to the constructor. Additional template variables + can be feed into the pipeline `run` method and will be merged before rendering the template. + Usage example: ```python from haystack.components.builders import DynamicChatPromptBuilder from haystack.components.generators.chat import OpenAIChatGenerator @@ -69,37 +63,27 @@ class DynamicChatPromptBuilder: 'total_tokens': 238}})]}} ``` - The primary advantage of using DynamicChatPromptBuilder is showcased in the examples provided above. - DynamicChatPromptBuilder allows dynamic customization of prompt messages without the need to reload or recreate the - pipeline for each invocation. - - In the example above, the first query asks for general information about Berlin, and the second query requests - the weather forecast for Berlin in the next few days. DynamicChatPromptBuilder efficiently handles these distinct - prompt structures by adjusting pipeline run parameters invocations, as opposed to a regular PromptBuilder, which - would require recreating or reloading the pipeline for each distinct type of query, leading to inefficiency and - potential service disruptions, especially in server environments where continuous service is vital. - Note that the weather forecast in the example above is fictional, but it can be easily connected to a weather API to provide real weather forecasts. """ def __init__(self, runtime_variables: Optional[List[str]] = None): """ - Initializes DynamicChatPromptBuilder with the provided variable names. These variable names are used to resolve - variables and their values during pipeline runtime execution. For example, if `runtime_variables` contains - `documents`, your instance of DynamicChatPromptBuilder will expect an input called `documents`. - The values associated with variables from the pipeline runtime are then injected into template placeholders - of a ChatMessage that is provided to the `run` method. - - :param runtime_variables: A list of template variable names you can use in chat prompt construction. - :type runtime_variables: Optional[List[str]] + Constructs a DynamicChatPromptBuilder component. + + :param runtime_variables: + A list of template variable names you can use in chat prompt construction. For example, + if `runtime_variables` contains the string `documents`, the component will create an input called + `documents` of type `Any`. These variable names are used to resolve variables and their values during + pipeline execution. The values associated with variables from the pipeline runtime are then injected into + template placeholders of a ChatMessage that is provided to the `run` method. """ runtime_variables = runtime_variables or [] # setup inputs - run_input_slots = {"prompt_source": List[ChatMessage], "template_variables": Optional[Dict[str, Any]]} - kwargs_input_slots = {var: Optional[Any] for var in runtime_variables} - component.set_input_types(self, **run_input_slots, **kwargs_input_slots) + default_inputs = {"prompt_source": List[ChatMessage], "template_variables": Optional[Dict[str, Any]]} + additional_input_slots = {var: Optional[Any] for var in runtime_variables} + component.set_input_types(self, **default_inputs, **additional_input_slots) # setup outputs component.set_output_types(self, prompt=List[ChatMessage]) @@ -108,27 +92,21 @@ def __init__(self, runtime_variables: Optional[List[str]] = None): def run(self, prompt_source: List[ChatMessage], template_variables: Optional[Dict[str, Any]] = None, **kwargs): """ Executes the dynamic prompt building process by processing a list of `ChatMessage` instances. - The last user message is treated as a template and rendered with the resolved pipeline variables and any - additional template variables provided. - + The last user message is treated as a template and rendered with the variables provided to the constructor. You can provide additional template variables directly to this method, which are then merged with the variables - resolved from the pipeline runtime. - - :param prompt_source: A list of `ChatMessage` instances. We make an assumption that the last user message has - the template for the chat prompt - :type prompt_source: List[ChatMessage] - - :param template_variables: An optional dictionary of template variables. Template variables provided at - initialization are required to resolve pipeline variables, and these are additional variables users can - provide directly to this method. - :type template_variables: Optional[Dict[str, Any]] - - :param kwargs: Additional keyword arguments, typically resolved from a pipeline, which are merged with the - provided template variables. - - :return: A dictionary containing the key "prompt", which holds either the updated list of `ChatMessage` - instances or the rendered string template, forming the complete dynamic prompt. - :rtype: Dict[str, List[ChatMessage] + provided to the constructor. + + :param prompt_source: + A list of `ChatMessage` instances. We make an assumption that the last user message has + the template for the chat prompt + :param template_variables: + A dictionary of template variables. Template variables provided at initialization are required + to resolve pipeline variables, and these are additional variables users can provide directly to this method. + :param kwargs: + Additional keyword arguments, typically resolved from a pipeline, which are merged with the provided template variables. + + :returns: A dictionary with the following keys: + - `prompt`: The updated list of `ChatMessage` instances after rendering the string template. """ kwargs = kwargs or {} template_variables = template_variables or {} @@ -149,20 +127,17 @@ def _process_chat_messages(self, prompt_source: List[ChatMessage], template_vari template variables. The resulting message replaces the last user message in the list, forming a complete, templated chat prompt. - :param prompt_source: A list of `ChatMessage` instances to be processed. The last message is expected - to be from a user and is treated as a template. - :type prompt_source: List[ChatMessage] - - :param template_variables: A dictionary of template variables used for rendering the last user message. - :type template_variables: Dict[str, Any] - - :return: A list of `ChatMessage` instances, where the last user message has been replaced with its - templated version. - :rtype: List[ChatMessage] - - :raises ValueError: If `chat_messages` is empty or contains elements that are not instances of - `ChatMessage`. - :raises ValueError: If the last message in `chat_messages` is not from a user. + :param prompt_source: + A list of `ChatMessage` instances to be processed. The last message is expected + to be from a user and is treated as a template. + :param template_variables: + A dictionary of template variables used for rendering the last user message. + :returns: + A list of `ChatMessage` instances, where the last user message has been replaced with its + :raises ValueError: + If `chat_messages` is empty or contains elements that are not instances of `ChatMessage`. + :raises ValueError: + If the last message in `chat_messages` is not from a user. """ if not prompt_source: raise ValueError( @@ -194,11 +169,14 @@ def _validate_template(self, template_text: str, provided_variables: Set[str]): If all the required template variables are provided, returns a Jinja2 template object. Otherwise, raises a ValueError. - :param template_text: A Jinja2 template as a string. - :param provided_variables: A set of provided template variables. - :type provided_variables: Set[str] - :return: A Jinja2 template object if all the required template variables are provided. - :raises ValueError: If all the required template variables are not provided. + :param template_text: + A Jinja2 template as a string. + :param provided_variables: + A set of provided template variables. + :returns: + A Jinja2 template object if all the required template variables are provided. + :raises ValueError: + If all the required template variables are not provided. """ template = Template(template_text) ast = template.environment.parse(template_text) diff --git a/haystack/components/builders/dynamic_prompt_builder.py b/haystack/components/builders/dynamic_prompt_builder.py index a2801d4481..7434de0c70 100644 --- a/haystack/components/builders/dynamic_prompt_builder.py +++ b/haystack/components/builders/dynamic_prompt_builder.py @@ -14,8 +14,7 @@ class DynamicPromptBuilder: DynamicPromptBuilder is designed to construct dynamic prompts for the pipeline. Users can change the prompt template at runtime by providing a new template for each pipeline run invocation if needed. - The following example demonstrates how to use the DynamicPromptBuilder: - + Usage example: ```python from typing import List from haystack.components.builders import DynamicPromptBuilder @@ -63,7 +62,7 @@ def run(self, doc_input: str): >> 'total_tokens': 34}}]}} Note how in the example above, we can dynamically change the prompt template by providing a new template to the - run method of the pipeline. This dynamic prompt generation is in stark contrast to the static prompt generation + run method of the pipeline. This dynamic prompt generation is in contrast to the static prompt generation using `PromptBuilder`, where the prompt template is fixed for the pipeline's lifetime and cannot be changed for each pipeline run invocation. @@ -71,14 +70,14 @@ def run(self, doc_input: str): def __init__(self, runtime_variables: Optional[List[str]] = None): """ - Initializes DynamicPromptBuilder with the provided variable names. These variable names are used to resolve - variables and their values during pipeline runtime execution. For example, if `runtime_variables` contains - `documents` your instance of DynamicPromptBuilder will expect an input called `documents`. - The values associated with variables from the pipeline runtime are then injected into template placeholders - of a string template that is provided to the `run` method. - - :param runtime_variables: A list of template variable names you can use in chat prompt construction. - :type runtime_variables: Optional[List[str]] + Constructs a DynamicPromptBuilder component. + + :param runtime_variables: + A list of template variable names you can use in prompt construction. For example, + if `runtime_variables` contains the string `documents`, the component will create an input called + `documents` of type `Any`. These variable names are used to resolve variables and their values during + pipeline execution. The values associated with variables from the pipeline runtime are then injected into + template placeholders of a prompt text template that is provided to the `run` method. """ runtime_variables = runtime_variables or [] @@ -101,20 +100,17 @@ def run(self, prompt_source: str, template_variables: Optional[Dict[str, Any]] = render the final prompt. You can provide additional template variables directly to this method, that are then merged with the variables resolved from the pipeline runtime. - :param prompt_source: A string template. - :type prompt_source: str - - :param template_variables: An optional dictionary of template variables. Template variables provided at - initialization are required to resolve pipeline variables, and these are additional variables users can - provide directly to this method. - :type template_variables: Optional[Dict[str, Any]] - - :param kwargs: Additional keyword arguments, typically resolved from a pipeline, which are merged with the - provided template variables. - - :return: A dictionary containing the key "prompt", which holds either the updated list of `ChatMessage` - instances or the rendered string template, forming the complete dynamic prompt. - :rtype: Dict[str, Union[List[ChatMessage], str]] + :param prompt_source: + A string template. + :param template_variables: + An optional dictionary of template variables. Template variables provided at initialization are required + to resolve pipeline variables, and these are additional variables users can provide directly to this method. + :param kwargs: + Additional keyword arguments, typically resolved from a pipeline, which are merged with the provided + template variables. + + :returns: A dictionary with the following keys: + - `prompt`: The updated prompt text after rendering the string template. """ kwargs = kwargs or {} template_variables = template_variables or {} @@ -135,11 +131,14 @@ def _validate_template(self, template_text: str, provided_variables: Set[str]): If all the required template variables are provided, returns a Jinja2 template object. Otherwise, raises a ValueError. - :param template_text: A Jinja2 template as a string. - :param provided_variables: A set of provided template variables. - :type provided_variables: Set[str] - :return: A Jinja2 template object if all the required template variables are provided. - :raises ValueError: If all the required template variables are not provided. + :param template_text: + A Jinja2 template as a string. + :param provided_variables: + A set of provided template variables. + :returns: + A Jinja2 template object if all the required template variables are provided. + :raises ValueError: + If all the required template variables are not provided. """ template = Template(template_text) ast = template.environment.parse(template_text) diff --git a/haystack/components/builders/prompt_builder.py b/haystack/components/builders/prompt_builder.py index d8495d01b2..f6c51af34b 100644 --- a/haystack/components/builders/prompt_builder.py +++ b/haystack/components/builders/prompt_builder.py @@ -1,17 +1,17 @@ -from typing import Any, Dict +from typing import Any from jinja2 import Template, meta -from haystack import component, default_to_dict +from haystack import component @component class PromptBuilder: """ - PromptBuilder is a component that renders a prompt from a template string using Jinja2 engine. + PromptBuilder is a component that renders a prompt from a template string using Jinja2 templates. The template variables found in the template string are used as input types for the component and are all required. - Usage: + Usage example: ```python template = "Translate the following context to {{ target_language }}. Context: {{ snippet }}; Translation:" builder = PromptBuilder(template=template) @@ -21,10 +21,9 @@ class PromptBuilder: def __init__(self, template: str): """ - Initialize the component with a template string. + Constructs a PromptBuilder component. - :param template: Jinja2 template string, e.g. "Summarize this document: {documents}\\nSummary:" - :type template: str + :param template: A Jinja2 template string, e.g. "Summarize this document: {documents}\\nSummary:" """ self._template_string = template self.template = Template(template) @@ -33,9 +32,13 @@ def __init__(self, template: str): for var in template_variables: component.set_input_type(self, var, Any, "") - def to_dict(self) -> Dict[str, Any]: - return default_to_dict(self, template=self._template_string) - @component.output_types(prompt=str) def run(self, **kwargs): + """ + :param kwargs: + The variables that will be used to render the prompt template. + + :returns: A dictionary with the following keys: + - `prompt`: The updated prompt text after rendering the prompt template. + """ return {"prompt": self.template.render(kwargs)} diff --git a/test/components/builders/test_prompt_builder.py b/test/components/builders/test_prompt_builder.py index e1c86f7bdd..e08de568cd 100644 --- a/test/components/builders/test_prompt_builder.py +++ b/test/components/builders/test_prompt_builder.py @@ -8,15 +8,6 @@ def test_init(): assert builder._template_string == "This is a {{ variable }}" -def test_to_dict(): - builder = PromptBuilder(template="This is a {{ variable }}") - res = builder.to_dict() - assert res == { - "type": "haystack.components.builders.prompt_builder.PromptBuilder", - "init_parameters": {"template": "This is a {{ variable }}"}, - } - - def test_run(): builder = PromptBuilder(template="This is a {{ variable }}") res = builder.run(variable="test")