From 0fc2bac467b466d510adc37f1b7a1a73bf259bf8 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 30 Aug 2023 16:06:30 +0200 Subject: [PATCH 01/25] add generators module --- .../preview/components/generators/__init__.py | 0 .../preview/components/generators/_helpers.py | 28 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 haystack/preview/components/generators/__init__.py create mode 100644 haystack/preview/components/generators/_helpers.py diff --git a/haystack/preview/components/generators/__init__.py b/haystack/preview/components/generators/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/haystack/preview/components/generators/_helpers.py b/haystack/preview/components/generators/_helpers.py new file mode 100644 index 0000000000..a17b40f731 --- /dev/null +++ b/haystack/preview/components/generators/_helpers.py @@ -0,0 +1,28 @@ +import logging + + +logger = logging.getLogger(__name__) + + +def enforce_token_limit(prompt: str, tokenizer, max_tokens_limit: int) -> str: + """ + Ensure that the length of the prompt is within the max tokens limit of the model. + If needed, truncate the prompt text so that it fits within the limit. + + :param prompt: Prompt text to be sent to the generative model. + :param tokenizer: The tokenizer used to encode the prompt. + :param max_tokens_limit: The max tokens limit of the model. + :return: The prompt text that fits within the max tokens limit of the model. + """ + tokens = tokenizer.encode(prompt) + tokens_count = len(tokens) + if tokens_count <= max_tokens_limit: + logger.warning( + "The prompt has been truncated from %s tokens to %s tokens so that the prompt fits within the max token " + "limit. Reduce the length of the prompt to prevent it from being cut off.", + tokens_count, + max_tokens_limit, + ) + tokenized_payload = tokenizer.encode(prompt) + prompt = tokenizer.decode(tokenized_payload[:max_tokens_limit]) + return prompt From 7f6325c1c80e5ef588a8fe3cee5aa6414e945df8 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 30 Aug 2023 16:24:44 +0200 Subject: [PATCH 02/25] add tests for module helper --- .../preview/components/generators/_helpers.py | 2 +- .../components/generators/test_helpers.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 test/preview/components/generators/test_helpers.py diff --git a/haystack/preview/components/generators/_helpers.py b/haystack/preview/components/generators/_helpers.py index a17b40f731..a027b4c170 100644 --- a/haystack/preview/components/generators/_helpers.py +++ b/haystack/preview/components/generators/_helpers.py @@ -16,7 +16,7 @@ def enforce_token_limit(prompt: str, tokenizer, max_tokens_limit: int) -> str: """ tokens = tokenizer.encode(prompt) tokens_count = len(tokens) - if tokens_count <= max_tokens_limit: + if tokens_count > max_tokens_limit: logger.warning( "The prompt has been truncated from %s tokens to %s tokens so that the prompt fits within the max token " "limit. Reduce the length of the prompt to prevent it from being cut off.", diff --git a/test/preview/components/generators/test_helpers.py b/test/preview/components/generators/test_helpers.py new file mode 100644 index 0000000000..2c4a64a1fc --- /dev/null +++ b/test/preview/components/generators/test_helpers.py @@ -0,0 +1,18 @@ +from unittest.mock import Mock + +import pytest + +from haystack.preview.components.generators._helpers import enforce_token_limit + + +@pytest.mark.unit +def test_enforce_token_limit(caplog): + tokenizer = Mock() + tokenizer.encode = lambda text: text.split() + tokenizer.decode = lambda tokens: " ".join(tokens) + + assert enforce_token_limit("This is a test prompt.", tokenizer=tokenizer, max_tokens_limit=3) == "This is a" + assert caplog.records[0].message == ( + "The prompt has been truncated from 5 tokens to 3 tokens so that the prompt fits within the max token " + "limit. Reduce the length of the prompt to prevent it from being cut off." + ) From 47b679970ebd5e07cafd6c4c5a5113af01712565 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 30 Aug 2023 16:27:53 +0200 Subject: [PATCH 03/25] add chatgpt generator --- .../components/generators/openai/__init__.py | 0 .../components/generators/openai/_helpers.py | 172 ++++++++++++ .../components/generators/openai/chatgpt.py | 263 ++++++++++++++++++ .../components/generators/openai/errors.py | 33 +++ 4 files changed, 468 insertions(+) create mode 100644 haystack/preview/components/generators/openai/__init__.py create mode 100644 haystack/preview/components/generators/openai/_helpers.py create mode 100644 haystack/preview/components/generators/openai/chatgpt.py create mode 100644 haystack/preview/components/generators/openai/errors.py diff --git a/haystack/preview/components/generators/openai/__init__.py b/haystack/preview/components/generators/openai/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py new file mode 100644 index 0000000000..2f26be3d91 --- /dev/null +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -0,0 +1,172 @@ +from typing import List, Callable, Dict, Any + +import os +import logging +import json + +import tenacity +import requests +import sseclient + +from haystack.preview.lazy_imports import LazyImport +from haystack.preview.components.generators.openai.errors import ( + OpenAIError, + OpenAIRateLimitError, + OpenAIUnauthorizedError, +) + + +with LazyImport() as tiktoken_import: + import tiktoken + + +logger = logging.getLogger(__name__) + + +OPENAI_TIMEOUT = float(os.environ.get("HAYSTACK_REMOTE_API_TIMEOUT_SEC", 30)) +OPENAI_BACKOFF = int(os.environ.get("HAYSTACK_REMOTE_API_BACKOFF_SEC", 10)) +OPENAI_MAX_RETRIES = int(os.environ.get("HAYSTACK_REMOTE_API_MAX_RETRIES", 5)) +TOKENIZERS = { + **tiktoken.model.MODEL_TO_ENCODING, + "gpt-35-turbo": "cl100k_base", # https://github.com/openai/tiktoken/pull/72 +} +TOKENIZERS_TOKEN_LIMITS = { + "gpt2": 2049, # Ref: https://platform.openai.com/docs/models/gpt-3 + "text-davinci": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3 + "gpt3-35-turbo": 2049, # Ref: https://platform.openai.com/docs/models/gpt-3-5 + "gpt-3.5-turbo-16k": 16384, # Ref: https://platform.openai.com/docs/models/gpt-3-5 + "gpt-3": 4096, # Ref: https://platform.openai.com/docs/models/gpt-3 + "gpt-4-32k": 32768, # Ref: https://platform.openai.com/docs/models/gpt-4 + "gpt-4": 8192, # Ref: https://platform.openai.com/docs/models/gpt-4 + "": 2049, # Default +} + + +#: Retry on OpenAI errors +openai_retry = tenacity.retry( + reraise=True, + retry=tenacity.retry_if_exception_type(OpenAIError) + and tenacity.retry_if_not_exception_type(OpenAIUnauthorizedError), + wait=tenacity.wait_exponential(multiplier=OPENAI_BACKOFF), + stop=tenacity.stop_after_attempt(OPENAI_MAX_RETRIES), +) + + +def default_streaming_callback(token: str): + """ + Default callback function for streaming responses from OpenAI API. + Prints the tokens to stdout as soon as they are received and returns them. + """ + print(token, flush=True, end="") + return token + + +@openai_retry +def query_chat_model(url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> List[str]: + """ + Query ChatGPT without streaming the response. + + :param url: The URL to query. + :param headers: The headers to send with the request. + :param payload: The payload to send with the request. + :return: A list of strings containing the response from the OpenAI API. + """ + response = requests.request("POST", url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT) + raise_for_status(response=response) + json_response = json.loads(response.text) + check_truncated_answers(result=json_response, payload=payload) + check_filtered_answers(result=json_response, payload=payload) + return [choice["message"]["content"].strip() for choice in response["choices"]] + + +@openai_retry +def query_chat_model_stream( + url: str, headers: Dict[str, str], payload: Dict[str, Any], callback: Callable, marker: str +) -> List[str]: + """ + Query ChatGPT and streams the response. Once the stream finishes, returns a list of strings just like + self._query_llm() + + :param url: The URL to query. + :param headers: The headers to send with the request. + :param payload: The payload to send with the request. + :param callback: A callback function that is called when a new token is received from the stream. + The callback function should accept two parameters: the token received from the stream and **kwargs. + The callback function should return the token that will be returned at the end of the streaming. + :param marker: A marker that indicates the end of the stream. It is used to determine when to stop streaming. + :return: A list of strings containing the response from the OpenAI API. + """ + response = requests.request("POST", url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT) + raise_for_status(response=response) + + client = sseclient.SSEClient(response) + tokens = [] + try: + for event in client.events(): + if event.data != marker: + event_data = json.loads(event.data) + delta = event_data["choices"][0]["delta"] + token = delta["content"] if "content" in delta else None + if token: + tokens.append(callback(token, event_data=event_data["choices"])) + finally: + client.close() + return ["".join(tokens)] + + +def raise_for_status(response: requests.Response): + """ + Raises the appropriate OpenAI error in case of a bad response. + + :param response: The response returned from the OpenAI API. + :raises OpenAIError: If the response status code is not 200. + """ + if response.status_code != 200: + openai_error: OpenAIError + if response.status_code == 429: + openai_error = OpenAIRateLimitError(f"API rate limit exceeded: {response.text}") + elif response.status_code == 401: + openai_error = OpenAIUnauthorizedError(f"API key is invalid: {response.text}") + else: + openai_error = OpenAIError( + f"OpenAI returned an error.\n" + f"Status code: {response.status_code}\n" + f"Response body: {response.text}", + status_code=response.status_code, + ) + raise openai_error + + +def check_truncated_answers(result: Dict[str, Any], payload: Dict[str, Any]): + """ + Check the `finish_reason` the answers returned by OpenAI completions endpoint. + If the `finish_reason` is `length`, log a warning to the user. + + :param result: The result returned from the OpenAI API. + :param payload: The payload sent to the OpenAI API. + """ + truncated_completions = sum(1 for ans in result["choices"] if ans["finish_reason"] == "length") + if truncated_completions > 0: + logger.warning( + "%s out of the %s completions have been truncated before reaching a natural stopping point. " + "Increase the max_tokens parameter to allow for longer completions.", + truncated_completions, + payload["n"], + ) + + +def check_filtered_answers(result: Dict[str, Any], payload: Dict[str, Any]): + """ + Check the `finish_reason` the answers returned by OpenAI completions endpoint. + If the `finish_reason` is `content_filter`, log a warning to the user. + + :param result: The result returned from the OpenAI API. + :param payload: The payload sent to the OpenAI API. + """ + filtered_completions = sum(1 for ans in result["choices"] if ans["finish_reason"] == "content_filter") + if filtered_completions > 0: + logger.warning( + "%s out of the %s completions have omitted content due to a flag from OpenAI content filters.", + filtered_completions, + payload["n"], + ) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py new file mode 100644 index 0000000000..8930d9f401 --- /dev/null +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -0,0 +1,263 @@ +from typing import Optional, List, Callable, Dict, Any + +import logging + +from haystack.preview.lazy_imports import LazyImport +from haystack.preview import component, default_from_dict, default_to_dict +from haystack.preview.components.generators._helpers import enforce_token_limit +from haystack.preview.components.generators.openai._helpers import ( + default_streaming_callback, + query_chat_model, + query_chat_model_stream, + TOKENIZERS, + TOKENIZERS_TOKEN_LIMITS, +) + + +with LazyImport() as tiktoken_import: + import tiktoken + + +logger = logging.getLogger(__name__) + + +@component +class ChatGPTGenerator: + """ + ChatGPT LLM Generator. + + Queries ChatGPT using OpenAI's GPT-3 ChatGPT API. Invocations are made using REST API. + See [OpenAI ChatGPT API](https://platform.openai.com/docs/guides/chat) for more details. + """ + + def __init__( + self, + api_key: Optional[str] = None, + model_name: str = "gpt-3.5-turbo", + system_prompt: Optional[str] = "You are a helpful assistant.", + max_reply_tokens: Optional[int] = 500, + temperature: Optional[float] = 0.7, + top_p: Optional[float] = 1, + n: Optional[int] = 1, + stop: Optional[List[str]] = None, + presence_penalty: Optional[float] = 0, + frequency_penalty: Optional[float] = 0, + logit_bias: Optional[Dict[str, float]] = None, + moderate_content: bool = True, + stream: bool = False, + streaming_callback: Optional[Callable] = default_streaming_callback, + streaming_done_marker="[DONE]", + api_base_url: str = "https://api.openai.com/v1", + openai_organization: Optional[str] = None, + ): + """ + Creates an instance of ChatGPTGenerator for OpenAI's GPT-3.5 model. + + :param api_key: The OpenAI API key. + :param model_name: The name or path of the underlying model. + :param system_prompt: The prompt to be prepended to the user prompt. + :param max_reply_tokens: The maximum number of tokens the output text can have. + :param temperature: What sampling temperature to use. Higher values means the model will take more risks. + Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + :param top_p: An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + :param n: How many completions to generate for each prompt. + :param stop: One or more sequences where the API will stop generating further tokens. + :param presence_penalty: What penalty to apply if a token is already present at all. Bigger values mean + the model will be less likely to repeat the same token in the text. + :param frequency_penalty: What penalty to apply if a token has already been generated in the text. + Bigger values mean the model will be less likely to repeat the same token in the text. + :param logit_bias: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the + values are the bias to add to that token. + :param moderate_content: If set to True, the input and generated answers are filtered for potentially + sensitive content using the [OpenAI Moderation API](https://platform.openai.com/docs/guides/moderation). + If the input or answers are flagged, an empty list is returned in place of the answers. + :param stream: If set to True, the API will stream the response. The streaming_callback parameter + is used to process the stream. If set to False, the response will be returned as a string. + :param streaming_callback: A callback function that is called when a new token is received from the stream. + The callback function should accept two parameters: the token received from the stream and **kwargs. + The callback function should return the token to be sent to the stream. If the callback function is not + provided, the token is printed to stdout. + :param streaming_done_marker: A marker that indicates the end of the stream. The marker is used to determine + when to stop streaming. Defaults to "[DONE]". + :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. + :param openai_organization: The OpenAI organization ID. + + See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. + """ + if not api_key: + logger.warning("OpenAI API key is missing. You will need to provide an API key to Pipeline.run().") + + self.api_key = api_key + self.model_name = model_name + self.system_prompt = system_prompt + + self.max_reply_tokens = max_reply_tokens + self.temperature = temperature + self.top_p = top_p + self.n = n + self.stop = stop + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.logit_bias = logit_bias + self.moderate_content = moderate_content + self.stream = stream + self.streaming_callback = streaming_callback + self.streaming_done_marker = streaming_done_marker + + self.openai_organization = openai_organization + self.api_base_url = api_base_url + + self.tokenizer = None + for model_prefix in TOKENIZERS: + if model_name.startswith(model_prefix): + self.tokenizer = tiktoken.get_encoding(TOKENIZERS[model_prefix]) + break + if not self.tokenizer: + raise ValueError(f"Tokenizer for model {model_name} not found.") + + self.max_reply_tokens_limit = None + for model_prefix in TOKENIZERS_TOKEN_LIMITS: + if model_name.startswith(model_prefix): + self.max_reply_tokens_limit = TOKENIZERS_TOKEN_LIMITS[model_prefix] + break + if not self.max_reply_tokens_limit: + raise ValueError(f"Max reply tokens limit for model {model_name} not found.") + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize this component to a dictionary. + """ + return default_to_dict( + self, + api_key=self.api_key, + model_name=self.model_name, + system_prompt=self.system_prompt, + max_reply_tokens=self.max_reply_tokens, + temperature=self.temperature, + top_p=self.top_p, + n=self.n, + stop=self.stop, + presence_penalty=self.presence_penalty, + frequency_penalty=self.frequency_penalty, + logit_bias=self.logit_bias, + moderate_content=self.moderate_content, + stream=self.stream, + # FIXME how to serialize the streaming callback? + streaming_done_marker=self.streaming_done_marker, + api_base_url=self.api_base_url, + openai_organization=self.openai_organization, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "ChatGPTGenerator": + """ + Deserialize this component from a dictionary. + """ + # FIXME how to deserialize the streaming callback? + return default_from_dict(cls, data) + + @component.output_types(replies=List[List[str]]) + def run( + self, + prompts: List[str], + api_key: str, + model_name: str = "gpt-3.5-turbo", + system_prompt: Optional[str] = "You are a helpful assistant.", + max_reply_tokens: Optional[int] = 500, + temperature: Optional[float] = 0.7, + top_p: Optional[float] = 1, + n: Optional[int] = 1, + stop: Optional[List[str]] = None, + presence_penalty: Optional[float] = 0, + frequency_penalty: Optional[float] = 0, + logit_bias: Optional[Dict[str, float]] = None, + moderate_content: bool = True, + api_base_url: str = "https://api.openai.com/v1", + openai_organization: Optional[str] = None, + stream: bool = False, + streaming_callback: Optional[Callable] = None, + streaming_done_marker: str = "[DONE]", + ): + """ + Queries the LLM with the prompts to produce replies. + + :param prompts: The prompts to be sent to the generative model. + :param api_key: The OpenAI API key. + :param model_name: The name or path of the underlying model. + :param system_prompt: The prompt to be prepended to the user prompt. + :param max_reply_tokens: The maximum number of tokens the output text can have. + :param temperature: What sampling temperature to use. Higher values means the model will take more risks. + Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + :param top_p: An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + :param n: How many completions to generate for each prompt. + :param stop: One or more sequences where the API will stop generating further tokens. + :param presence_penalty: What penalty to apply if a token is already present at all. Bigger values mean + the model will be less likely to repeat the same token in the text. + :param frequency_penalty: What penalty to apply if a token has already been generated in the text. + Bigger values mean the model will be less likely to repeat the same token in the text. + :param logit_bias: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the + values are the bias to add to that token. + :param moderate_content: If set to True, the input and generated answers are filtered for potentially + sensitive content using the [OpenAI Moderation API](https://platform.openai.com/docs/guides/moderation). + If the input or answers are flagged, an empty list is returned in place of the answers. + :param stream: If set to True, the API will stream the response. The streaming_callback parameter + is used to process the stream. If set to False, the response will be returned as a string. + :param streaming_callback: A callback function that is called when a new token is received from the stream. + The callback function should accept two parameters: the token received from the stream and **kwargs. + The callback function should return the token to be sent to the stream. If the callback function is not + provided, the token is printed to stdout. + :param streaming_done_marker: A marker that indicates the end of the stream. The marker is used to determine + when to stop streaming. Defaults to "[DONE]". + :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. + :param openai_organization: The OpenAI organization ID. + + See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. + """ + if not api_key and not self.api_key: + raise ValueError("OpenAI API key is missing. Please provide an API key.") + + stream = stream or self.stream + parameters = { + "model": model_name or self.model_name, + "max_reply_tokens": max_reply_tokens or self.max_reply_tokens, + "temperature": temperature or self.temperature, + "top_p": top_p or self.top_p, + "n": n or self.n, + "stream": stream, + "stop": stop or self.stop, + "presence_penalty": presence_penalty or self.presence_penalty, + "frequency_penalty": frequency_penalty or self.frequency_penalty, + "logit_bias": logit_bias or self.logit_bias, + "moderate_content": moderate_content or self.moderate_content, + } + + headers = {"Authorization": f"Bearer {api_key or self.api_key}", "Content-Type": "application/json"} + if openai_organization or self.openai_organization: + headers["OpenAI-Organization"] = openai_organization or self.openai_organization + + url = f"{api_base_url or self.api_base_url}/chat/completions" + + replies = [] + streaming_callback = streaming_callback or self.streaming_callback + for prompt in prompts: + payload = { + **parameters, + "messages": enforce_token_limit( + prompt=[{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}], + tokenizer=self.tokenizer, + max_tokens_limit=self.max_reply_tokens_limit, + ), + } + if stream: + reply = query_chat_model_stream( + url=url, headers=headers, payload=payload, callback=streaming_callback, marker=streaming_done_marker + ) + else: + reply = query_chat_model(url=url, headers=headers, payload=payload) + replies.append(reply) + + return {"replies": replies} diff --git a/haystack/preview/components/generators/openai/errors.py b/haystack/preview/components/generators/openai/errors.py new file mode 100644 index 0000000000..09c117c826 --- /dev/null +++ b/haystack/preview/components/generators/openai/errors.py @@ -0,0 +1,33 @@ +from typing import Optional +from haystack.preview import ComponentError + + +class OpenAIError(ComponentError): + """Exception for issues that occur in the OpenAI APIs""" + + def __init__( + self, message: Optional[str] = None, status_code: Optional[int] = None, send_message_in_event: bool = False + ): + super().__init__(message=message, send_message_in_event=send_message_in_event) + self.status_code = status_code + + +class OpenAIRateLimitError(OpenAIError): + """ + Rate limit error for OpenAI API (status code 429) + See https://help.openai.com/en/articles/5955604-how-can-i-solve-429-too-many-requests-errors + See https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits + """ + + def __init__(self, message: Optional[str] = None, send_message_in_event: bool = False): + super().__init__(message=message, status_code=429, send_message_in_event=send_message_in_event) + + +class OpenAIUnauthorizedError(OpenAIError): + """ + Unauthorized error for OpenAI API (status code 401) + See https://platform.openai.com/docs/guides/error-codes/api-errors + """ + + def __init__(self, message: Optional[str] = None, send_message_in_event: bool = False): + super().__init__(message=message, status_code=401, send_message_in_event=send_message_in_event) From 4e8fcb38ba180c8b398b45c3d4e068453b502785 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 30 Aug 2023 17:02:10 +0200 Subject: [PATCH 04/25] add init and serialization tests --- haystack/preview/__init__.py | 2 +- .../components/generators/openai/_helpers.py | 3 +- .../components/generators/openai/chatgpt.py | 10 +- .../generators/openai/test_chatgpt.py | 302 ++++++++++++++++++ 4 files changed, 310 insertions(+), 7 deletions(-) create mode 100644 test/preview/components/generators/openai/test_chatgpt.py diff --git a/haystack/preview/__init__.py b/haystack/preview/__init__.py index 36f7de744f..a5f9ed225c 100644 --- a/haystack/preview/__init__.py +++ b/haystack/preview/__init__.py @@ -1,4 +1,4 @@ from canals import component, Pipeline from canals.serialization import default_from_dict, default_to_dict -from canals.errors import DeserializationError +from canals.errors import DeserializationError, ComponentError from haystack.preview.dataclasses import * diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index 2f26be3d91..2663ad204c 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -33,7 +33,8 @@ TOKENIZERS_TOKEN_LIMITS = { "gpt2": 2049, # Ref: https://platform.openai.com/docs/models/gpt-3 "text-davinci": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3 - "gpt3-35-turbo": 2049, # Ref: https://platform.openai.com/docs/models/gpt-3-5 + "gpt-35-turbo": 2049, # Ref: https://platform.openai.com/docs/models/gpt-3-5 + "gpt-3.5-turbo": 2049, # Ref: https://platform.openai.com/docs/models/gpt-3-5 "gpt-3.5-turbo-16k": 16384, # Ref: https://platform.openai.com/docs/models/gpt-3-5 "gpt-3": 4096, # Ref: https://platform.openai.com/docs/models/gpt-3 "gpt-4-32k": 32768, # Ref: https://platform.openai.com/docs/models/gpt-4 diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 8930d9f401..5c95759ff8 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -117,13 +117,13 @@ def __init__( if not self.tokenizer: raise ValueError(f"Tokenizer for model {model_name} not found.") - self.max_reply_tokens_limit = None + self.max_tokens_limit = None for model_prefix in TOKENIZERS_TOKEN_LIMITS: if model_name.startswith(model_prefix): - self.max_reply_tokens_limit = TOKENIZERS_TOKEN_LIMITS[model_prefix] + self.max_tokens_limit = TOKENIZERS_TOKEN_LIMITS[model_prefix] break - if not self.max_reply_tokens_limit: - raise ValueError(f"Max reply tokens limit for model {model_name} not found.") + if not self.max_tokens_limit: + raise ValueError(f"Max tokens limit for model {model_name} not found.") def to_dict(self) -> Dict[str, Any]: """ @@ -249,7 +249,7 @@ def run( "messages": enforce_token_limit( prompt=[{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}], tokenizer=self.tokenizer, - max_tokens_limit=self.max_reply_tokens_limit, + max_tokens_limit=self.max_tokens_limit, ), } if stream: diff --git a/test/preview/components/generators/openai/test_chatgpt.py b/test/preview/components/generators/openai/test_chatgpt.py new file mode 100644 index 0000000000..63a1e236af --- /dev/null +++ b/test/preview/components/generators/openai/test_chatgpt.py @@ -0,0 +1,302 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator +from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback + + +class TestChatGPTGenerator: + @pytest.mark.unit + def test_init_default(self, caplog): + with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTGenerator() + assert component.api_key is None + assert component.model_name == "gpt-3.5-turbo" + assert component.system_prompt == "You are a helpful assistant." + assert component.max_reply_tokens == 500 + assert component.temperature == 0.7 + assert component.top_p == 1 + assert component.n == 1 + assert component.stop is None + assert component.presence_penalty == 0 + assert component.frequency_penalty == 0 + assert component.logit_bias == None + assert component.moderate_content is True + assert component.stream is False + assert component.streaming_callback == default_streaming_callback + assert component.streaming_done_marker == "[DONE]" + assert component.api_base_url == "https://api.openai.com/v1" + assert component.openai_organization is None + assert component.max_tokens_limit == 2049 + + tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") + assert caplog.records[0].message == ( + "OpenAI API key is missing. You will need to provide an API key to Pipeline.run()." + ) + + @pytest.mark.unit + def test_init_with_parameters(self, caplog, monkeypatch): + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS", {"test-model-name": "test-encoding"} + ) + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS_TOKEN_LIMITS", {"test-model-name": 10} + ) + with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + callback = lambda x: x + component = ChatGPTGenerator( + api_key="test-api-key", + model_name="test-model-name", + system_prompt="test-system-prompt", + max_reply_tokens=20, + temperature=1, + top_p=5, + n=10, + stop=["test-stop-word"], + presence_penalty=0.5, + frequency_penalty=0.4, + logit_bias={"test-logit-bias": 0.3}, + moderate_content=False, + stream=True, + streaming_callback=callback, + streaming_done_marker="test-marker", + api_base_url="test-base-url", + openai_organization="test-orga-id", + ) + assert component.api_key == "test-api-key" + assert component.model_name == "test-model-name" + assert component.system_prompt == "test-system-prompt" + assert component.max_reply_tokens == 20 + assert component.temperature == 1 + assert component.top_p == 5 + assert component.n == 10 + assert component.stop == ["test-stop-word"] + assert component.presence_penalty == 0.5 + assert component.frequency_penalty == 0.4 + assert component.logit_bias == {"test-logit-bias": 0.3} + assert component.moderate_content is False + assert component.stream is True + assert component.streaming_callback == callback + assert component.streaming_done_marker == "test-marker" + assert component.api_base_url == "test-base-url" + assert component.openai_organization == "test-orga-id" + assert component.max_tokens_limit == 10 + + tiktoken_patch.get_encoding.assert_called_once_with("test-encoding") + assert not caplog.records + + @pytest.mark.unit + def test_to_dict_with_custom_init_parameters(self): + with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTGenerator() + data = component.to_dict() + assert data == { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": None, + "model_name": "gpt-3.5-turbo", + "system_prompt": "You are a helpful assistant.", + "max_reply_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stop": None, + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": None, + "moderate_content": True, + "stream": False, + # FIXME serialize callback? + "streaming_done_marker": "[DONE]", + "api_base_url": "https://api.openai.com/v1", + "openai_organization": None, + }, + } + + @pytest.mark.unit + def test_to_dict_with_custom_init_parameters(self, monkeypatch): + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS", {"test-model-name": "test-encoding"} + ) + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS_TOKEN_LIMITS", {"test-model-name": 10} + ) + with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + callback = lambda x: x + component = ChatGPTGenerator( + api_key="test-api-key", + model_name="test-model-name", + system_prompt="test-system-prompt", + max_reply_tokens=20, + temperature=1, + top_p=5, + n=10, + stop=["test-stop-word"], + presence_penalty=0.5, + frequency_penalty=0.4, + logit_bias={"test-logit-bias": 0.3}, + moderate_content=False, + stream=True, + streaming_callback=callback, + streaming_done_marker="test-marker", + api_base_url="test-base-url", + openai_organization="test-orga-id", + ) + data = component.to_dict() + assert data == { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": "test-api-key", + "model_name": "test-model-name", + "system_prompt": "test-system-prompt", + "max_reply_tokens": 20, + "temperature": 1, + "top_p": 5, + "n": 10, + "stop": ["test-stop-word"], + "presence_penalty": 0.5, + "frequency_penalty": 0.4, + "logit_bias": {"test-logit-bias": 0.3}, + "moderate_content": False, + "stream": True, + # FIXME serialize callback? + "streaming_done_marker": "test-marker", + "api_base_url": "test-base-url", + "openai_organization": "test-orga-id", + }, + } + + @pytest.mark.unit + def test_from_dict(self, monkeypatch): + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS", {"test-model-name": "test-encoding"} + ) + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS_TOKEN_LIMITS", {"test-model-name": 10} + ) + with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + data = { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": "test-api-key", + "model_name": "test-model-name", + "system_prompt": "test-system-prompt", + "max_reply_tokens": 20, + "temperature": 1, + "top_p": 5, + "n": 10, + "stop": ["test-stop-word"], + "presence_penalty": 0.5, + "frequency_penalty": 0.4, + "logit_bias": {"test-logit-bias": 0.3}, + "moderate_content": False, + "stream": True, + # FIXME serialize callback? + "streaming_done_marker": "test-marker", + "api_base_url": "test-base-url", + "openai_organization": "test-orga-id", + }, + } + component = ChatGPTGenerator.from_dict(data) + assert component.api_key == "test-api-key" + assert component.model_name == "test-model-name" + assert component.system_prompt == "test-system-prompt" + assert component.max_reply_tokens == 20 + assert component.temperature == 1 + assert component.top_p == 5 + assert component.n == 10 + assert component.stop == ["test-stop-word"] + assert component.presence_penalty == 0.5 + assert component.frequency_penalty == 0.4 + assert component.logit_bias == {"test-logit-bias": 0.3} + assert component.moderate_content is False + assert component.stream is True + assert component.streaming_callback == default_streaming_callback + assert component.streaming_done_marker == "test-marker" + assert component.api_base_url == "test-base-url" + assert component.openai_organization == "test-orga-id" + assert component.max_tokens_limit == 10 + + # @pytest.mark.unit + # @patch( + # "haystack.preview.components.embedders.sentence_transformers_document_embedder._SentenceTransformersEmbeddingBackendFactory" + # ) + # def test_warmup(self, mocked_factory): + # embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") + # mocked_factory.get_embedding_backend.assert_not_called() + # embedder.warm_up() + # mocked_factory.get_embedding_backend.assert_called_once_with( + # model_name_or_path="model", device="cpu", use_auth_token=None + # ) + + # @pytest.mark.unit + # @patch( + # "haystack.preview.components.embedders.sentence_transformers_document_embedder._SentenceTransformersEmbeddingBackendFactory" + # ) + # def test_warmup_doesnt_reload(self, mocked_factory): + # embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") + # mocked_factory.get_embedding_backend.assert_not_called() + # embedder.warm_up() + # embedder.warm_up() + # mocked_factory.get_embedding_backend.assert_called_once() + + # @pytest.mark.unit + # def test_run(self): + # embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") + # embedder.embedding_backend = MagicMock() + # embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist() + + # documents = [Document(content=f"document number {i}") for i in range(5)] + + # result = embedder.run(documents=documents) + + # assert isinstance(result["documents"], list) + # assert len(result["documents"]) == len(documents) + # for doc in result["documents"]: + # assert isinstance(doc, Document) + # assert isinstance(doc.embedding, list) + # assert isinstance(doc.embedding[0], float) + + # @pytest.mark.unit + # def test_run_wrong_input_format(self): + # embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") + + # string_input = "text" + # list_integers_input = [1, 2, 3] + + # with pytest.raises( + # TypeError, match="SentenceTransformersDocumentEmbedder expects a list of Documents as input" + # ): + # embedder.run(documents=string_input) + + # with pytest.raises( + # TypeError, match="SentenceTransformersDocumentEmbedder expects a list of Documents as input" + # ): + # embedder.run(documents=list_integers_input) + + # @pytest.mark.unit + # def test_embed_metadata(self): + # embedder = SentenceTransformersDocumentEmbedder( + # model_name_or_path="model", metadata_fields_to_embed=["meta_field"], embedding_separator="\n" + # ) + # embedder.embedding_backend = MagicMock() + + # documents = [ + # Document(content=f"document number {i}", metadata={"meta_field": f"meta_value {i}"}) for i in range(5) + # ] + + # embedder.run(documents=documents) + + # embedder.embedding_backend.embed.assert_called_once_with( + # [ + # "meta_value 0\ndocument number 0", + # "meta_value 1\ndocument number 1", + # "meta_value 2\ndocument number 2", + # "meta_value 3\ndocument number 3", + # "meta_value 4\ndocument number 4", + # ], + # batch_size=32, + # show_progress_bar=True, + # normalize_embeddings=False, + # ) From cbf77019d96ebb54fc9e0e57077a676aa3a3c8c5 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 30 Aug 2023 17:46:43 +0200 Subject: [PATCH 05/25] test component --- .../components/generators/openai/_helpers.py | 5 +- .../components/generators/openai/chatgpt.py | 96 +++++---- .../generators/openai/test_chatgpt.py | 193 ++++++++++-------- 3 files changed, 164 insertions(+), 130 deletions(-) diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index 2663ad204c..364b42b081 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -26,11 +26,11 @@ OPENAI_TIMEOUT = float(os.environ.get("HAYSTACK_REMOTE_API_TIMEOUT_SEC", 30)) OPENAI_BACKOFF = int(os.environ.get("HAYSTACK_REMOTE_API_BACKOFF_SEC", 10)) OPENAI_MAX_RETRIES = int(os.environ.get("HAYSTACK_REMOTE_API_MAX_RETRIES", 5)) -TOKENIZERS = { +OPENAI_TOKENIZERS = { **tiktoken.model.MODEL_TO_ENCODING, "gpt-35-turbo": "cl100k_base", # https://github.com/openai/tiktoken/pull/72 } -TOKENIZERS_TOKEN_LIMITS = { +OPENAI_TOKENIZERS_TOKEN_LIMITS = { "gpt2": 2049, # Ref: https://platform.openai.com/docs/models/gpt-3 "text-davinci": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3 "gpt-35-turbo": 2049, # Ref: https://platform.openai.com/docs/models/gpt-3-5 @@ -39,7 +39,6 @@ "gpt-3": 4096, # Ref: https://platform.openai.com/docs/models/gpt-3 "gpt-4-32k": 32768, # Ref: https://platform.openai.com/docs/models/gpt-4 "gpt-4": 8192, # Ref: https://platform.openai.com/docs/models/gpt-4 - "": 2049, # Default } diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 5c95759ff8..0e9bc19953 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -9,8 +9,8 @@ default_streaming_callback, query_chat_model, query_chat_model_stream, - TOKENIZERS, - TOKENIZERS_TOKEN_LIMITS, + OPENAI_TOKENIZERS, + OPENAI_TOKENIZERS_TOKEN_LIMITS, ) @@ -110,20 +110,20 @@ def __init__( self.api_base_url = api_base_url self.tokenizer = None - for model_prefix in TOKENIZERS: + for model_prefix in OPENAI_TOKENIZERS: if model_name.startswith(model_prefix): - self.tokenizer = tiktoken.get_encoding(TOKENIZERS[model_prefix]) + self.tokenizer = tiktoken.get_encoding(OPENAI_TOKENIZERS[model_prefix]) break if not self.tokenizer: - raise ValueError(f"Tokenizer for model {model_name} not found.") + raise ValueError(f"Tokenizer for model '{model_name}' not found.") self.max_tokens_limit = None - for model_prefix in TOKENIZERS_TOKEN_LIMITS: + for model_prefix in OPENAI_TOKENIZERS_TOKEN_LIMITS: if model_name.startswith(model_prefix): - self.max_tokens_limit = TOKENIZERS_TOKEN_LIMITS[model_prefix] + self.max_tokens_limit = OPENAI_TOKENIZERS_TOKEN_LIMITS[model_prefix] break if not self.max_tokens_limit: - raise ValueError(f"Max tokens limit for model {model_name} not found.") + raise ValueError(f"Max tokens limit for model '{model_name}' not found.") def to_dict(self) -> Dict[str, Any]: """ @@ -162,23 +162,23 @@ def from_dict(cls, data: Dict[str, Any]) -> "ChatGPTGenerator": def run( self, prompts: List[str], - api_key: str, - model_name: str = "gpt-3.5-turbo", - system_prompt: Optional[str] = "You are a helpful assistant.", - max_reply_tokens: Optional[int] = 500, - temperature: Optional[float] = 0.7, - top_p: Optional[float] = 1, - n: Optional[int] = 1, + api_key: Optional[str] = None, + model_name: Optional[str] = None, + system_prompt: Optional[str] = None, + max_reply_tokens: Optional[int] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + n: Optional[int] = None, stop: Optional[List[str]] = None, - presence_penalty: Optional[float] = 0, - frequency_penalty: Optional[float] = 0, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, logit_bias: Optional[Dict[str, float]] = None, - moderate_content: bool = True, - api_base_url: str = "https://api.openai.com/v1", + moderate_content: Optional[bool] = None, + api_base_url: Optional[str] = None, openai_organization: Optional[str] = None, - stream: bool = False, + stream: Optional[bool] = None, streaming_callback: Optional[Callable] = None, - streaming_done_marker: str = "[DONE]", + streaming_done_marker: Optional[str] = None, ): """ Queries the LLM with the prompts to produce replies. @@ -217,32 +217,48 @@ def run( See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. """ - if not api_key and not self.api_key: + api_key = api_key if api_key is not None else self.api_key + model_name = model_name if model_name is not None else self.model_name + system_prompt = system_prompt if system_prompt is not None else self.system_prompt + max_reply_tokens = max_reply_tokens if max_reply_tokens is not None else self.max_reply_tokens + temperature = temperature if temperature is not None else self.temperature + top_p = top_p if top_p is not None else self.top_p + n = n if n is not None else self.n + stop = stop if stop is not None else self.stop + presence_penalty = presence_penalty if presence_penalty is not None else self.presence_penalty + frequency_penalty = frequency_penalty if frequency_penalty is not None else self.frequency_penalty + logit_bias = logit_bias if logit_bias is not None else self.logit_bias + moderate_content = moderate_content if moderate_content is not None else self.moderate_content + stream = stream if stream is not None else self.stream + streaming_callback = streaming_callback if streaming_callback is not None else self.streaming_callback + streaming_done_marker = ( + streaming_done_marker if streaming_done_marker is not None else self.streaming_done_marker + ) + api_base_url = api_base_url or self.api_base_url + openai_organization = openai_organization if openai_organization is not None else self.openai_organization + + if not api_key: raise ValueError("OpenAI API key is missing. Please provide an API key.") - stream = stream or self.stream parameters = { - "model": model_name or self.model_name, - "max_reply_tokens": max_reply_tokens or self.max_reply_tokens, - "temperature": temperature or self.temperature, - "top_p": top_p or self.top_p, - "n": n or self.n, + "model": model_name, + "max_reply_tokens": max_reply_tokens, + "temperature": temperature, + "top_p": top_p, + "n": n, "stream": stream, - "stop": stop or self.stop, - "presence_penalty": presence_penalty or self.presence_penalty, - "frequency_penalty": frequency_penalty or self.frequency_penalty, - "logit_bias": logit_bias or self.logit_bias, - "moderate_content": moderate_content or self.moderate_content, + "stop": stop, + "presence_penalty": presence_penalty, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, + "moderate_content": moderate_content, } - - headers = {"Authorization": f"Bearer {api_key or self.api_key}", "Content-Type": "application/json"} - if openai_organization or self.openai_organization: - headers["OpenAI-Organization"] = openai_organization or self.openai_organization - - url = f"{api_base_url or self.api_base_url}/chat/completions" + headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + if openai_organization: + headers["OpenAI-Organization"] = openai_organization + url = f"{api_base_url}/chat/completions" replies = [] - streaming_callback = streaming_callback or self.streaming_callback for prompt in prompts: payload = { **parameters, diff --git a/test/preview/components/generators/openai/test_chatgpt.py b/test/preview/components/generators/openai/test_chatgpt.py index 63a1e236af..8d76c16b1c 100644 --- a/test/preview/components/generators/openai/test_chatgpt.py +++ b/test/preview/components/generators/openai/test_chatgpt.py @@ -1,4 +1,4 @@ -from unittest.mock import MagicMock, patch +from unittest.mock import patch import pytest @@ -38,10 +38,12 @@ def test_init_default(self, caplog): @pytest.mark.unit def test_init_with_parameters(self, caplog, monkeypatch): monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS", {"test-model-name": "test-encoding"} + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", + {"test-model-name": "test-encoding"}, ) monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS_TOKEN_LIMITS", {"test-model-name": 10} + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", + {"test-model-name": 10}, ) with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: callback = lambda x: x @@ -86,6 +88,22 @@ def test_init_with_parameters(self, caplog, monkeypatch): tiktoken_patch.get_encoding.assert_called_once_with("test-encoding") assert not caplog.records + @pytest.mark.unit + def test_init_unknown_tokenizer(self): + with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + with pytest.raises(ValueError, match="Tokenizer for model 'test-another-model-name' not found."): + ChatGPTGenerator(model_name="test-another-model-name") + + @pytest.mark.unit + def test_init_unknown_token_limit(self, monkeypatch): + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", + {"test-model-name": "test-encoding"}, + ) + with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + with pytest.raises(ValueError, match="Max tokens limit for model 'test-model-name' not found."): + ChatGPTGenerator(model_name="test-model-name") + @pytest.mark.unit def test_to_dict_with_custom_init_parameters(self): with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: @@ -117,10 +135,12 @@ def test_to_dict_with_custom_init_parameters(self): @pytest.mark.unit def test_to_dict_with_custom_init_parameters(self, monkeypatch): monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS", {"test-model-name": "test-encoding"} + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", + {"test-model-name": "test-encoding"}, ) monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS_TOKEN_LIMITS", {"test-model-name": 10} + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", + {"test-model-name": 10}, ) with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: callback = lambda x: x @@ -170,10 +190,12 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): @pytest.mark.unit def test_from_dict(self, monkeypatch): monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS", {"test-model-name": "test-encoding"} + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", + {"test-model-name": "test-encoding"}, ) monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.TOKENIZERS_TOKEN_LIMITS", {"test-model-name": 10} + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", + {"test-model-name": 10}, ) with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: data = { @@ -218,85 +240,82 @@ def test_from_dict(self, monkeypatch): assert component.openai_organization == "test-orga-id" assert component.max_tokens_limit == 10 - # @pytest.mark.unit - # @patch( - # "haystack.preview.components.embedders.sentence_transformers_document_embedder._SentenceTransformersEmbeddingBackendFactory" - # ) - # def test_warmup(self, mocked_factory): - # embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") - # mocked_factory.get_embedding_backend.assert_not_called() - # embedder.warm_up() - # mocked_factory.get_embedding_backend.assert_called_once_with( - # model_name_or_path="model", device="cpu", use_auth_token=None - # ) - - # @pytest.mark.unit - # @patch( - # "haystack.preview.components.embedders.sentence_transformers_document_embedder._SentenceTransformersEmbeddingBackendFactory" - # ) - # def test_warmup_doesnt_reload(self, mocked_factory): - # embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") - # mocked_factory.get_embedding_backend.assert_not_called() - # embedder.warm_up() - # embedder.warm_up() - # mocked_factory.get_embedding_backend.assert_called_once() - - # @pytest.mark.unit - # def test_run(self): - # embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") - # embedder.embedding_backend = MagicMock() - # embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist() - - # documents = [Document(content=f"document number {i}") for i in range(5)] - - # result = embedder.run(documents=documents) - - # assert isinstance(result["documents"], list) - # assert len(result["documents"]) == len(documents) - # for doc in result["documents"]: - # assert isinstance(doc, Document) - # assert isinstance(doc.embedding, list) - # assert isinstance(doc.embedding[0], float) - - # @pytest.mark.unit - # def test_run_wrong_input_format(self): - # embedder = SentenceTransformersDocumentEmbedder(model_name_or_path="model") - - # string_input = "text" - # list_integers_input = [1, 2, 3] - - # with pytest.raises( - # TypeError, match="SentenceTransformersDocumentEmbedder expects a list of Documents as input" - # ): - # embedder.run(documents=string_input) - - # with pytest.raises( - # TypeError, match="SentenceTransformersDocumentEmbedder expects a list of Documents as input" - # ): - # embedder.run(documents=list_integers_input) - - # @pytest.mark.unit - # def test_embed_metadata(self): - # embedder = SentenceTransformersDocumentEmbedder( - # model_name_or_path="model", metadata_fields_to_embed=["meta_field"], embedding_separator="\n" - # ) - # embedder.embedding_backend = MagicMock() - - # documents = [ - # Document(content=f"document number {i}", metadata={"meta_field": f"meta_value {i}"}) for i in range(5) - # ] + @pytest.mark.unit + def test_run_no_api_key(self): + with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTGenerator() + with pytest.raises(ValueError, match="OpenAI API key is missing. Please provide an API key."): + component.run(prompts=[]) - # embedder.run(documents=documents) + @pytest.mark.unit + def test_run(self): + with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model") as query_patch: + query_patch.return_value = ["test-response"] + component = ChatGPTGenerator( + api_key="test-api-key", openai_organization="test_orga_id", api_base_url="test-base-url" + ) + results = component.run(prompts=["test-prompt"]) + assert results == {"replies": [["test-response"]]} + query_patch.assert_called_once_with( + url="test-base-url/chat/completions", + headers={ + "Authorization": f"Bearer test-api-key", + "Content-Type": "application/json", + "OpenAI-Organization": "test_orga_id", + }, + payload={ + "model": "gpt-3.5-turbo", + "max_reply_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stream": False, + "stop": None, + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": None, + "moderate_content": True, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "test-prompt"}, + ], + }, + ) - # embedder.embedding_backend.embed.assert_called_once_with( - # [ - # "meta_value 0\ndocument number 0", - # "meta_value 1\ndocument number 1", - # "meta_value 2\ndocument number 2", - # "meta_value 3\ndocument number 3", - # "meta_value 4\ndocument number 4", - # ], - # batch_size=32, - # show_progress_bar=True, - # normalize_embeddings=False, - # ) + @pytest.mark.unit + def test_run_streaming(self): + with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model_stream") as query_patch: + query_patch.return_value = ["test-response-a", "test-response-b"] + callback = lambda x: x + "--test" + component = ChatGPTGenerator( + api_key="test-api-key", stream=True, streaming_callback=callback, streaming_done_marker="test-done" + ) + results = component.run(prompts=["test-prompt1, test-prompt2"]) + assert results == { + "replies": [["test-response-a", "test-response-b"], ["test-response-a", "test-response-b"]] + } + query_patch.assert_called_once_with( + url="https://api.openai.com/v1/chat/completions", + headers={"Authorization": f"Bearer test-api-key", "Content-Type": "application/json"}, + payload={ + "model": "gpt-3.5-turbo", + "max_reply_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stream": True, + "stop": None, + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": None, + "moderate_content": True, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "test-prompt"}, + ], + }, + callback=callback, + marker="test-done", + ) From 419f615c7cfe4c3ef6c90908977a445ac845149d Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 30 Aug 2023 17:49:44 +0200 Subject: [PATCH 06/25] reno --- releasenotes/notes/generators-module-261376beb9c031cc.yaml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 releasenotes/notes/generators-module-261376beb9c031cc.yaml diff --git a/releasenotes/notes/generators-module-261376beb9c031cc.yaml b/releasenotes/notes/generators-module-261376beb9c031cc.yaml new file mode 100644 index 0000000000..0c57e6bc7b --- /dev/null +++ b/releasenotes/notes/generators-module-261376beb9c031cc.yaml @@ -0,0 +1,2 @@ +preview: + - Add generators module for LLM generator components. From 08e9c626b716a3bf960dd1626a4642e2f9e8eda9 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 30 Aug 2023 17:51:08 +0200 Subject: [PATCH 07/25] reno --- releasenotes/notes/chatgpt-generator-6f47f1f6207c05f5.yaml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 releasenotes/notes/chatgpt-generator-6f47f1f6207c05f5.yaml diff --git a/releasenotes/notes/chatgpt-generator-6f47f1f6207c05f5.yaml b/releasenotes/notes/chatgpt-generator-6f47f1f6207c05f5.yaml new file mode 100644 index 0000000000..363b1d092f --- /dev/null +++ b/releasenotes/notes/chatgpt-generator-6f47f1f6207c05f5.yaml @@ -0,0 +1,2 @@ +preview: + - Add ChatGPTGenerator component. From a984e67a35cf08a925a5f02e6f0c11133e4e877c Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 30 Aug 2023 20:28:45 +0200 Subject: [PATCH 08/25] more tests --- .../components/generators/openai/_helpers.py | 21 +- .../components/generators/openai/errors.py | 15 +- ...t_chatgpt.py => test_chatgpt_generator.py} | 30 +-- .../generators/openai/test_openai_helpers.py | 180 ++++++++++++++++++ 4 files changed, 214 insertions(+), 32 deletions(-) rename test/preview/components/generators/openai/{test_chatgpt.py => test_chatgpt_generator.py} (94%) create mode 100644 test/preview/components/generators/openai/test_openai_helpers.py diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index 364b42b081..dfa8ecf94f 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -71,12 +71,12 @@ def query_chat_model(url: str, headers: Dict[str, str], payload: Dict[str, Any]) :param payload: The payload to send with the request. :return: A list of strings containing the response from the OpenAI API. """ - response = requests.request("POST", url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT) + response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT) raise_for_status(response=response) json_response = json.loads(response.text) check_truncated_answers(result=json_response, payload=payload) check_filtered_answers(result=json_response, payload=payload) - return [choice["message"]["content"].strip() for choice in response["choices"]] + return [choice["message"]["content"].strip() for choice in json_response["choices"]] @openai_retry @@ -96,19 +96,20 @@ def query_chat_model_stream( :param marker: A marker that indicates the end of the stream. It is used to determine when to stop streaming. :return: A list of strings containing the response from the OpenAI API. """ - response = requests.request("POST", url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT) + response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT) raise_for_status(response=response) client = sseclient.SSEClient(response) tokens = [] try: for event in client.events(): - if event.data != marker: - event_data = json.loads(event.data) - delta = event_data["choices"][0]["delta"] - token = delta["content"] if "content" in delta else None - if token: - tokens.append(callback(token, event_data=event_data["choices"])) + if event.data == marker: + break + event_data = json.loads(event.data) + delta = event_data["choices"][0]["delta"] + token = delta["content"] if "content" in delta else None + if token: + tokens.append(callback(token, event_data=event_data["choices"])) finally: client.close() return ["".join(tokens)] @@ -121,7 +122,7 @@ def raise_for_status(response: requests.Response): :param response: The response returned from the OpenAI API. :raises OpenAIError: If the response status code is not 200. """ - if response.status_code != 200: + if response.status_code >= 400: openai_error: OpenAIError if response.status_code == 429: openai_error = OpenAIRateLimitError(f"API rate limit exceeded: {response.text}") diff --git a/haystack/preview/components/generators/openai/errors.py b/haystack/preview/components/generators/openai/errors.py index 09c117c826..f5c7e35e39 100644 --- a/haystack/preview/components/generators/openai/errors.py +++ b/haystack/preview/components/generators/openai/errors.py @@ -5,10 +5,9 @@ class OpenAIError(ComponentError): """Exception for issues that occur in the OpenAI APIs""" - def __init__( - self, message: Optional[str] = None, status_code: Optional[int] = None, send_message_in_event: bool = False - ): - super().__init__(message=message, send_message_in_event=send_message_in_event) + def __init__(self, message: Optional[str] = None, status_code: Optional[int] = None): + super().__init__() + self.message = message self.status_code = status_code @@ -19,8 +18,8 @@ class OpenAIRateLimitError(OpenAIError): See https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits """ - def __init__(self, message: Optional[str] = None, send_message_in_event: bool = False): - super().__init__(message=message, status_code=429, send_message_in_event=send_message_in_event) + def __init__(self, message: Optional[str] = None): + super().__init__(message=message, status_code=429) class OpenAIUnauthorizedError(OpenAIError): @@ -29,5 +28,5 @@ class OpenAIUnauthorizedError(OpenAIError): See https://platform.openai.com/docs/guides/error-codes/api-errors """ - def __init__(self, message: Optional[str] = None, send_message_in_event: bool = False): - super().__init__(message=message, status_code=401, send_message_in_event=send_message_in_event) + def __init__(self, message: Optional[str] = None): + super().__init__(message=message, status_code=401) diff --git a/test/preview/components/generators/openai/test_chatgpt.py b/test/preview/components/generators/openai/test_chatgpt_generator.py similarity index 94% rename from test/preview/components/generators/openai/test_chatgpt.py rename to test/preview/components/generators/openai/test_chatgpt_generator.py index 8d76c16b1c..f1ff3bc3de 100644 --- a/test/preview/components/generators/openai/test_chatgpt.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -1,4 +1,4 @@ -from unittest.mock import patch +from unittest.mock import patch, Mock import pytest @@ -251,13 +251,16 @@ def test_run_no_api_key(self): def test_run(self): with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model") as query_patch: - query_patch.return_value = ["test-response"] + query_patch.return_value = ["test-response", "another-response"] component = ChatGPTGenerator( api_key="test-api-key", openai_organization="test_orga_id", api_base_url="test-base-url" ) - results = component.run(prompts=["test-prompt"]) - assert results == {"replies": [["test-response"]]} - query_patch.assert_called_once_with( + results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + assert results == { + "replies": [["test-response", "another-response"], ["test-response", "another-response"]] + } + query_patch.call_count == 2 + query_patch.assert_any_call( url="test-base-url/chat/completions", headers={ "Authorization": f"Bearer test-api-key", @@ -278,7 +281,7 @@ def test_run(self): "moderate_content": True, "messages": [ {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "test-prompt"}, + {"role": "user", "content": "test-prompt-1"}, ], }, ) @@ -287,16 +290,15 @@ def test_run(self): def test_run_streaming(self): with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model_stream") as query_patch: - query_patch.return_value = ["test-response-a", "test-response-b"] - callback = lambda x: x + "--test" + query_patch.side_effect = [["test-response-a"], ["test-response-b"]] + callback = Mock() component = ChatGPTGenerator( api_key="test-api-key", stream=True, streaming_callback=callback, streaming_done_marker="test-done" ) - results = component.run(prompts=["test-prompt1, test-prompt2"]) - assert results == { - "replies": [["test-response-a", "test-response-b"], ["test-response-a", "test-response-b"]] - } - query_patch.assert_called_once_with( + results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + assert results == {"replies": [["test-response-a"], ["test-response-b"]]} + query_patch.call_count == 2 + query_patch.assert_any_call( url="https://api.openai.com/v1/chat/completions", headers={"Authorization": f"Bearer test-api-key", "Content-Type": "application/json"}, payload={ @@ -313,7 +315,7 @@ def test_run_streaming(self): "moderate_content": True, "messages": [ {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "test-prompt"}, + {"role": "user", "content": "test-prompt-1"}, ], }, callback=callback, diff --git a/test/preview/components/generators/openai/test_openai_helpers.py b/test/preview/components/generators/openai/test_openai_helpers.py new file mode 100644 index 0000000000..aa4f8990e0 --- /dev/null +++ b/test/preview/components/generators/openai/test_openai_helpers.py @@ -0,0 +1,180 @@ +from unittest.mock import Mock, patch +import json + +import pytest + + +from haystack.preview.components.generators.openai.errors import ( + OpenAIUnauthorizedError, + OpenAIError, + OpenAIRateLimitError, +) +from haystack.preview.components.generators.openai._helpers import ( + raise_for_status, + check_truncated_answers, + check_filtered_answers, + query_chat_model, + query_chat_model_stream, + OPENAI_TIMEOUT, + OPENAI_MAX_RETRIES, +) + + +@pytest.fixture(autouse=True) +def tenacity_wait(): + with patch("tenacity.nap.time"): + yield + + +@pytest.mark.unit +def test_raise_for_status_200(): + response = Mock() + response.status_code = 200 + raise_for_status(response) + + +@pytest.mark.unit +def test_raise_for_status_401(): + response = Mock() + response.status_code = 401 + with pytest.raises(OpenAIUnauthorizedError): + raise_for_status(response) + + +@pytest.mark.unit +def test_raise_for_status_429(): + response = Mock() + response.status_code = 429 + with pytest.raises(OpenAIRateLimitError): + raise_for_status(response) + + +@pytest.mark.unit +def test_raise_for_status_500(): + response = Mock() + response.status_code = 500 + response.text = "Internal Server Error" + with pytest.raises(OpenAIError): + raise_for_status(response) + + +@pytest.mark.unit +def test_check_truncated_answers(caplog): + result = { + "choices": [ + {"finish_reason": "length"}, + {"finish_reason": "content_filter"}, + {"finish_reason": "length"}, + {"finish_reason": "stop"}, + ] + } + payload = {"n": 4} + check_truncated_answers(result, payload) + assert caplog.records[0].message == ( + "2 out of the 4 completions have been truncated before reaching a natural " + "stopping point. Increase the max_tokens parameter to allow for longer completions." + ) + + +@pytest.mark.unit +def test_check_truncated_answers(caplog): + result = {"choices": [{"finish_reason": "content_filter"}, {"finish_reason": "length"}, {"finish_reason": "stop"}]} + payload = {"n": 3} + check_filtered_answers(result, payload) + assert caplog.records[0].message == ( + "1 out of the 3 completions have omitted content due to a flag from " "OpenAI content filters." + ) + + +@pytest.mark.unit +def test_query_chat_model(): + with patch("haystack.preview.components.generators.openai._helpers.requests.post") as mock_post: + response = Mock() + response.status_code = 200 + response.text = '{"choices": [{"finish_reason": "stop", "message": {"content": " Hello, how are you? "}}]}' + mock_post.return_value = response + replies = query_chat_model(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) + mock_post.assert_called_once_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + ) + assert replies == ["Hello, how are you?"] + + +@pytest.mark.unit +def test_query_chat_model_fail(): + with patch("haystack.preview.components.generators.openai._helpers.requests.post") as mock_post: + response = Mock() + response.status_code = 500 + mock_post.return_value = response + with pytest.raises(OpenAIError): + query_chat_model(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) + mock_post.assert_called_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + ) + mock_post.call_count == OPENAI_MAX_RETRIES + + +@pytest.mark.unit +def test_query_chat_model_stream(): + with patch("haystack.preview.components.generators.openai._helpers.requests.post") as mock_post: + with patch("haystack.preview.components.generators.openai._helpers.sseclient.SSEClient") as mock_sseclient: + callback = lambda token, event_data: f"|{token}|" + response = Mock() + response.status_code = 200 + + mock_sseclient.return_value.events.return_value = [ + Mock(data='{"choices": [{"delta": {"content": "Hello"}}]}'), + Mock(data='{"choices": [{"delta": {"content": ","}}]}'), + Mock(data='{"choices": [{"delta": {"content": " how"}}]}'), + Mock(data='{"choices": [{"delta": {"content": " are"}}]}'), + Mock(data='{"choices": [{"delta": {"content": " you"}}]}'), + Mock(data='{"choices": [{"delta": {"content": "?"}}]}'), + Mock(data="test-marker"), + Mock(data='{"choices": [{"delta": {"content": "discarded tokens"}}]}'), + ] + + mock_post.return_value = response + replies = query_chat_model_stream( + url="test-url", + headers={"header": "test-header"}, + payload={"param": "test-param"}, + callback=callback, + marker="test-marker", + ) + mock_post.assert_called_once_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + ) + assert replies == ["|Hello||,|| how|| are|| you||?|"] + + +@pytest.mark.unit +def test_query_chat_model_stream_fail(): + with patch("haystack.preview.components.generators.openai._helpers.requests.post") as mock_post: + callback = Mock() + response = Mock() + response.status_code = 500 + mock_post.return_value = response + with pytest.raises(OpenAIError): + query_chat_model_stream( + url="test-url", + headers={"header": "test-header"}, + payload={"param": "test-param"}, + callback=callback, + marker="test-marker", + ) + mock_post.assert_called_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + ) + mock_post.call_count == OPENAI_MAX_RETRIES From 612876add26657ecef66c9cfae6ad30ba64a71f8 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 31 Aug 2023 10:45:43 +0200 Subject: [PATCH 09/25] add another test --- .../preview/components/generators/test_helpers.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/preview/components/generators/test_helpers.py b/test/preview/components/generators/test_helpers.py index 2c4a64a1fc..25139a4792 100644 --- a/test/preview/components/generators/test_helpers.py +++ b/test/preview/components/generators/test_helpers.py @@ -6,7 +6,7 @@ @pytest.mark.unit -def test_enforce_token_limit(caplog): +def test_enforce_token_limit_above_limit(caplog): tokenizer = Mock() tokenizer.encode = lambda text: text.split() tokenizer.decode = lambda tokens: " ".join(tokens) @@ -16,3 +16,16 @@ def test_enforce_token_limit(caplog): "The prompt has been truncated from 5 tokens to 3 tokens so that the prompt fits within the max token " "limit. Reduce the length of the prompt to prevent it from being cut off." ) + + +@pytest.mark.unit +def test_enforce_token_limit_below_limit(caplog): + tokenizer = Mock() + tokenizer.encode = lambda text: text.split() + tokenizer.decode = lambda tokens: " ".join(tokens) + + assert ( + enforce_token_limit("This is a test prompt.", tokenizer=tokenizer, max_tokens_limit=1000) + == "This is a test prompt." + ) + assert not caplog.records From e9c3de74862b0fb4c96bff43137c0641ec7498cb Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 31 Aug 2023 11:33:39 +0200 Subject: [PATCH 10/25] chat token limit --- .../components/test_chatgpt_generator.py | 44 +++++++++++++++++++ .../preview/components/generators/_helpers.py | 44 ++++++++++++++++++- .../components/generators/openai/_helpers.py | 19 +++----- .../components/generators/openai/chatgpt.py | 18 +++++--- .../components/generators/openai/errors.py | 3 ++ test/preview/components/conftest.py | 11 +++++ .../generators/openai/test_openai_helpers.py | 6 --- .../components/generators/test_helpers.py | 39 ++++++++++++++-- 8 files changed, 155 insertions(+), 29 deletions(-) create mode 100644 e2e/preview/components/test_chatgpt_generator.py create mode 100644 test/preview/components/conftest.py diff --git a/e2e/preview/components/test_chatgpt_generator.py b/e2e/preview/components/test_chatgpt_generator.py new file mode 100644 index 0000000000..d92519fe27 --- /dev/null +++ b/e2e/preview/components/test_chatgpt_generator.py @@ -0,0 +1,44 @@ +import os +import pytest +from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator + + +@pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", +) +def test_chatgpt_generator_run(): + component = ChatGPTGenerator(api_key=os.environ.get("OPENAI_API_KEY")) + results = component.run(prompts=["What's the capital of France?", "What's the capital of Germany?"], n=1) + + assert len(results["replies"]) == 2 + assert len(results["replies"][0]) == 1 + assert "Paris" in results["replies"][0][0] + assert len(results["replies"][1]) == 1 + assert "Berlin" in results["replies"][1][0] + + +# @pytest.mark.skipif( +# not os.environ.get("OPENAI_API_KEY", None), +# reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", +# ) +# def test_chatgpt_generator_run_streaming(): + +# class Callback: +# def __init__(self): +# self.response = "" + +# def __call__(self, token): +# self.responses += token +# return token + +# callback = Callback() + +# component = ChatGPTGenerator( +# os.environ.get("OPENAI_API_KEY"), stream=True, streaming_callback=callback +# ) +# results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + +# assert results == {"replies": [["test-response-a"], ["test-response-b"]]} + +# assert callback.responses == "test-response-a\ntest-response-b\n" diff --git a/haystack/preview/components/generators/_helpers.py b/haystack/preview/components/generators/_helpers.py index a027b4c170..c866c726e5 100644 --- a/haystack/preview/components/generators/_helpers.py +++ b/haystack/preview/components/generators/_helpers.py @@ -1,3 +1,5 @@ +from typing import List + import logging @@ -18,11 +20,49 @@ def enforce_token_limit(prompt: str, tokenizer, max_tokens_limit: int) -> str: tokens_count = len(tokens) if tokens_count > max_tokens_limit: logger.warning( - "The prompt has been truncated from %s tokens to %s tokens so that the prompt fits within the max token " - "limit. Reduce the length of the prompt to prevent it from being cut off.", + "The prompt has been truncated from %s tokens to %s tokens to fit within the max token limit. " + "Reduce the length of the prompt to prevent it from being cut off.", tokens_count, max_tokens_limit, ) tokenized_payload = tokenizer.encode(prompt) prompt = tokenizer.decode(tokenized_payload[:max_tokens_limit]) return prompt + + +def enforce_token_limit_chat( + prompts: List[str], tokenizer, max_tokens_limit: int, tokens_per_message_overhead: int +) -> List[str]: + """ + Ensure that the length of the list of prompts is within the max tokens limit of the model. + If needed, truncate the prompts text and list so that it fits within the limit. + + :param prompts: Prompts text to be sent to the generative model. + :param tokenizer: The tokenizer used to encode the prompt. + :param max_tokens_limit: The max tokens limit of the model. + :param tokens_per_message_overhead: The number of tokens that are added to the prompt text for each message. + :return: A list of prompts that fits within the max tokens limit of the model. + """ + prompts_lens = [len(tokenizer.encode(prompt)) for prompt in prompts] + if (total_prompt_length := sum(prompts_lens) + (tokens_per_message_overhead * len(prompts))) <= max_tokens_limit: + return prompts + + logger.warning( + "The prompts have been truncated from %s tokens to %s tokens to fit within the max token limit. " + "Reduce the length of the prompt to prevent it from being cut off.", + total_prompt_length, + max_tokens_limit, + ) + cut_prompts = [] + cut_prompts_lens = [] + for prompt, prompt_len in zip(prompts, prompts_lens): + prompt_len = prompt_len + sum(cut_prompts_lens) + (tokens_per_message_overhead * (len(cut_prompts) + 1)) + if prompt_len <= max_tokens_limit: + cut_prompts.append(prompt) + cut_prompts_lens.append(prompt_len) + else: + remaining_tokens = ( + max_tokens_limit - sum(cut_prompts_lens) - (tokens_per_message_overhead * (len(cut_prompts) + 1)) + ) + cut_prompts.append(enforce_token_limit(prompt, tokenizer, remaining_tokens)) + return cut_prompts diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index dfa8ecf94f..fb541b2f46 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -123,19 +123,14 @@ def raise_for_status(response: requests.Response): :raises OpenAIError: If the response status code is not 200. """ if response.status_code >= 400: - openai_error: OpenAIError if response.status_code == 429: - openai_error = OpenAIRateLimitError(f"API rate limit exceeded: {response.text}") - elif response.status_code == 401: - openai_error = OpenAIUnauthorizedError(f"API key is invalid: {response.text}") - else: - openai_error = OpenAIError( - f"OpenAI returned an error.\n" - f"Status code: {response.status_code}\n" - f"Response body: {response.text}", - status_code=response.status_code, - ) - raise openai_error + raise OpenAIRateLimitError(f"API rate limit exceeded: {response.text}") + if response.status_code == 401: + raise OpenAIUnauthorizedError(f"API key is invalid: {response.text}") + raise OpenAIError( + f"OpenAI returned an error.\n" f"Status code: {response.status_code}\n" f"Response body: {response.text}", + status_code=response.status_code, + ) def check_truncated_answers(result: Dict[str, Any], payload: Dict[str, Any]): diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 0e9bc19953..63c427574d 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -4,7 +4,7 @@ from haystack.preview.lazy_imports import LazyImport from haystack.preview import component, default_from_dict, default_to_dict -from haystack.preview.components.generators._helpers import enforce_token_limit +from haystack.preview.components.generators._helpers import enforce_token_limit_chat from haystack.preview.components.generators.openai._helpers import ( default_streaming_callback, query_chat_model, @@ -21,6 +21,9 @@ logger = logging.getLogger(__name__) +TOKENS_PER_MESSAGE_OVERHEAD = 4 + + @component class ChatGPTGenerator: """ @@ -260,13 +263,16 @@ def run( replies = [] for prompt in prompts: + system_prompt, prompt = enforce_token_limit_chat( + prompts=[system_prompt, prompts[0]], + tokenizer=self.tokenizer, + max_tokens_limit=self.max_tokens_limit, + tokens_per_message_overhead=TOKENS_PER_MESSAGE_OVERHEAD, + ) + payload = { **parameters, - "messages": enforce_token_limit( - prompt=[{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}], - tokenizer=self.tokenizer, - max_tokens_limit=self.max_tokens_limit, - ), + "messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}], } if stream: reply = query_chat_model_stream( diff --git a/haystack/preview/components/generators/openai/errors.py b/haystack/preview/components/generators/openai/errors.py index f5c7e35e39..1787b4e17a 100644 --- a/haystack/preview/components/generators/openai/errors.py +++ b/haystack/preview/components/generators/openai/errors.py @@ -10,6 +10,9 @@ def __init__(self, message: Optional[str] = None, status_code: Optional[int] = N self.message = message self.status_code = status_code + def __str__(self): + return self.message + f"(status code {self.status_code})" if self.status_code else "" + class OpenAIRateLimitError(OpenAIError): """ diff --git a/test/preview/components/conftest.py b/test/preview/components/conftest.py new file mode 100644 index 0000000000..833ea877f7 --- /dev/null +++ b/test/preview/components/conftest.py @@ -0,0 +1,11 @@ +from unittest.mock import patch +import pytest + + +@pytest.fixture(autouse=True) +def tenacity_wait(): + """ + Mocks tenacity's wait function to speed up tests. + """ + with patch("tenacity.nap.time"): + yield diff --git a/test/preview/components/generators/openai/test_openai_helpers.py b/test/preview/components/generators/openai/test_openai_helpers.py index aa4f8990e0..67aacb3c3a 100644 --- a/test/preview/components/generators/openai/test_openai_helpers.py +++ b/test/preview/components/generators/openai/test_openai_helpers.py @@ -20,12 +20,6 @@ ) -@pytest.fixture(autouse=True) -def tenacity_wait(): - with patch("tenacity.nap.time"): - yield - - @pytest.mark.unit def test_raise_for_status_200(): response = Mock() diff --git a/test/preview/components/generators/test_helpers.py b/test/preview/components/generators/test_helpers.py index 25139a4792..ef702b042d 100644 --- a/test/preview/components/generators/test_helpers.py +++ b/test/preview/components/generators/test_helpers.py @@ -2,7 +2,7 @@ import pytest -from haystack.preview.components.generators._helpers import enforce_token_limit +from haystack.preview.components.generators._helpers import enforce_token_limit, enforce_token_limit_chat @pytest.mark.unit @@ -13,8 +13,8 @@ def test_enforce_token_limit_above_limit(caplog): assert enforce_token_limit("This is a test prompt.", tokenizer=tokenizer, max_tokens_limit=3) == "This is a" assert caplog.records[0].message == ( - "The prompt has been truncated from 5 tokens to 3 tokens so that the prompt fits within the max token " - "limit. Reduce the length of the prompt to prevent it from being cut off." + "The prompt has been truncated from 5 tokens to 3 tokens to fit within the max token limit. " + "Reduce the length of the prompt to prevent it from being cut off." ) @@ -29,3 +29,36 @@ def test_enforce_token_limit_below_limit(caplog): == "This is a test prompt." ) assert not caplog.records + + +@pytest.mark.unit +def test_enforce_token_limit_chat_above_limit(caplog): + tokenizer = Mock() + tokenizer.encode = lambda text: text.split() + tokenizer.decode = lambda tokens: " ".join(tokens) + + assert enforce_token_limit_chat( + ["System Prompt", "This is a test prompt."], + tokenizer=tokenizer, + max_tokens_limit=7, + tokens_per_message_overhead=2, + ) == ["System Prompt", "This is a"] + assert caplog.records[0].message == ( + "The prompts have been truncated from 11 tokens to 7 tokens to fit within the max token limit. " + "Reduce the length of the prompt to prevent it from being cut off." + ) + + +@pytest.mark.unit +def test_enforce_token_limit_chat_below_limit(caplog): + tokenizer = Mock() + tokenizer.encode = lambda text: text.split() + tokenizer.decode = lambda tokens: " ".join(tokens) + + assert enforce_token_limit_chat( + ["System Prompt", "This is a test prompt."], + tokenizer=tokenizer, + max_tokens_limit=100, + tokens_per_message_overhead=2, + ) == ["System Prompt", "This is a test prompt."] + assert not caplog.records From 725fabef7f2b2112f5de0afb6d0a2fd68fcfb76b Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 31 Aug 2023 11:41:29 +0200 Subject: [PATCH 11/25] move into openai --- .../components/generators/openai/__init__.py | 0 .../generators/{ => openai}/_helpers.py | 15 ++++++++++----- .../test_openai_helpers.py} | 4 ++-- 3 files changed, 12 insertions(+), 7 deletions(-) create mode 100644 haystack/preview/components/generators/openai/__init__.py rename haystack/preview/components/generators/{ => openai}/_helpers.py (63%) rename test/preview/components/generators/{test_helpers.py => openai/test_openai_helpers.py} (87%) diff --git a/haystack/preview/components/generators/openai/__init__.py b/haystack/preview/components/generators/openai/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/haystack/preview/components/generators/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py similarity index 63% rename from haystack/preview/components/generators/_helpers.py rename to haystack/preview/components/generators/openai/_helpers.py index a027b4c170..946901b644 100644 --- a/haystack/preview/components/generators/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -1,10 +1,15 @@ import logging +from haystack.preview.lazy_imports import LazyImport + +with LazyImport("Run 'pip install tiktoken'") as tiktoken_import: + import tiktoken + logger = logging.getLogger(__name__) -def enforce_token_limit(prompt: str, tokenizer, max_tokens_limit: int) -> str: +def enforce_token_limit(prompt: str, tokenizer: "tiktoken.Encoding", max_tokens_limit: int) -> str: """ Ensure that the length of the prompt is within the max tokens limit of the model. If needed, truncate the prompt text so that it fits within the limit. @@ -14,15 +19,15 @@ def enforce_token_limit(prompt: str, tokenizer, max_tokens_limit: int) -> str: :param max_tokens_limit: The max tokens limit of the model. :return: The prompt text that fits within the max tokens limit of the model. """ + tiktoken_import.check() tokens = tokenizer.encode(prompt) tokens_count = len(tokens) if tokens_count > max_tokens_limit: logger.warning( - "The prompt has been truncated from %s tokens to %s tokens so that the prompt fits within the max token " - "limit. Reduce the length of the prompt to prevent it from being cut off.", + "The prompt has been truncated from %s tokens to %s tokens to fit within the max token limit. " + "Reduce the length of the prompt to prevent it from being cut off.", tokens_count, max_tokens_limit, ) - tokenized_payload = tokenizer.encode(prompt) - prompt = tokenizer.decode(tokenized_payload[:max_tokens_limit]) + prompt = tokenizer.decode(tokens[:max_tokens_limit]) return prompt diff --git a/test/preview/components/generators/test_helpers.py b/test/preview/components/generators/openai/test_openai_helpers.py similarity index 87% rename from test/preview/components/generators/test_helpers.py rename to test/preview/components/generators/openai/test_openai_helpers.py index 25139a4792..a0003fa323 100644 --- a/test/preview/components/generators/test_helpers.py +++ b/test/preview/components/generators/openai/test_openai_helpers.py @@ -2,7 +2,7 @@ import pytest -from haystack.preview.components.generators._helpers import enforce_token_limit +from haystack.preview.components.generators.openai._helpers import enforce_token_limit @pytest.mark.unit @@ -13,7 +13,7 @@ def test_enforce_token_limit_above_limit(caplog): assert enforce_token_limit("This is a test prompt.", tokenizer=tokenizer, max_tokens_limit=3) == "This is a" assert caplog.records[0].message == ( - "The prompt has been truncated from 5 tokens to 3 tokens so that the prompt fits within the max token " + "The prompt has been truncated from 5 tokens to 3 tokens to fit within the max token " "limit. Reduce the length of the prompt to prevent it from being cut off." ) From c3bef8feab83a3402dea9f5818ffc617e33ce9c7 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 31 Aug 2023 12:13:08 +0200 Subject: [PATCH 12/25] fix test --- .../components/generators/openai/_helpers.py | 13 +++++-------- .../generators/openai/test_openai_helpers.py | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index 873721d947..f5b16c11ad 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -192,7 +192,7 @@ def enforce_token_limit(prompt: str, tokenizer: "tiktoken.Encoding", max_tokens_ def enforce_token_limit_chat( - prompts: List[str], tokenizer, max_tokens_limit: int, tokens_per_message_overhead: int + prompts: List[str], tokenizer: "tiktoken.Encoding", max_tokens_limit: int, tokens_per_message_overhead: int ) -> List[str]: """ Ensure that the length of the list of prompts is within the max tokens limit of the model. @@ -204,8 +204,8 @@ def enforce_token_limit_chat( :param tokens_per_message_overhead: The number of tokens that are added to the prompt text for each message. :return: A list of prompts that fits within the max tokens limit of the model. """ - prompts_lens = [len(tokenizer.encode(prompt)) for prompt in prompts] - if (total_prompt_length := sum(prompts_lens) + (tokens_per_message_overhead * len(prompts))) <= max_tokens_limit: + prompts_lens = [len(tokenizer.encode(prompt)) + tokens_per_message_overhead for prompt in prompts] + if (total_prompt_length := sum(prompts_lens)) <= max_tokens_limit: return prompts logger.warning( @@ -217,13 +217,10 @@ def enforce_token_limit_chat( cut_prompts = [] cut_prompts_lens = [] for prompt, prompt_len in zip(prompts, prompts_lens): - prompt_len = prompt_len + sum(cut_prompts_lens) + (tokens_per_message_overhead * (len(cut_prompts) + 1)) - if prompt_len <= max_tokens_limit: + if sum(cut_prompts_lens) + prompt_len <= max_tokens_limit: cut_prompts.append(prompt) cut_prompts_lens.append(prompt_len) else: - remaining_tokens = ( - max_tokens_limit - sum(cut_prompts_lens) - (tokens_per_message_overhead * (len(cut_prompts) + 1)) - ) + remaining_tokens = max_tokens_limit - sum(cut_prompts_lens) cut_prompts.append(enforce_token_limit(prompt, tokenizer, remaining_tokens)) return cut_prompts diff --git a/test/preview/components/generators/openai/test_openai_helpers.py b/test/preview/components/generators/openai/test_openai_helpers.py index 914fa4dcf8..f338ad72a3 100644 --- a/test/preview/components/generators/openai/test_openai_helpers.py +++ b/test/preview/components/generators/openai/test_openai_helpers.py @@ -78,7 +78,7 @@ def test_check_truncated_answers(caplog): payload = {"n": 3} check_filtered_answers(result, payload) assert caplog.records[0].message == ( - "1 out of the 3 completions have omitted content due to a flag from " "OpenAI content filters." + "1 out of the 3 completions have omitted content due to a flag from OpenAI content filters." ) From c1a7696f801181178d51153aa3b623ceb1fc1b96 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 31 Aug 2023 12:16:43 +0200 Subject: [PATCH 13/25] improve tests --- .../generators/openai/test_openai_helpers.py | 23 +++++-------------- test/preview/conftest.py | 13 +++++++++++ 2 files changed, 19 insertions(+), 17 deletions(-) create mode 100644 test/preview/conftest.py diff --git a/test/preview/components/generators/openai/test_openai_helpers.py b/test/preview/components/generators/openai/test_openai_helpers.py index a0003fa323..23a66117d1 100644 --- a/test/preview/components/generators/openai/test_openai_helpers.py +++ b/test/preview/components/generators/openai/test_openai_helpers.py @@ -1,17 +1,12 @@ -from unittest.mock import Mock - import pytest from haystack.preview.components.generators.openai._helpers import enforce_token_limit @pytest.mark.unit -def test_enforce_token_limit_above_limit(caplog): - tokenizer = Mock() - tokenizer.encode = lambda text: text.split() - tokenizer.decode = lambda tokens: " ".join(tokens) - - assert enforce_token_limit("This is a test prompt.", tokenizer=tokenizer, max_tokens_limit=3) == "This is a" +def test_enforce_token_limit_above_limit(caplog, mock_tokenizer): + prompt = enforce_token_limit("This is a test prompt.", tokenizer=mock_tokenizer, max_tokens_limit=3) + assert prompt == "This is a" assert caplog.records[0].message == ( "The prompt has been truncated from 5 tokens to 3 tokens to fit within the max token " "limit. Reduce the length of the prompt to prevent it from being cut off." @@ -19,13 +14,7 @@ def test_enforce_token_limit_above_limit(caplog): @pytest.mark.unit -def test_enforce_token_limit_below_limit(caplog): - tokenizer = Mock() - tokenizer.encode = lambda text: text.split() - tokenizer.decode = lambda tokens: " ".join(tokens) - - assert ( - enforce_token_limit("This is a test prompt.", tokenizer=tokenizer, max_tokens_limit=1000) - == "This is a test prompt." - ) +def test_enforce_token_limit_below_limit(caplog, mock_tokenizer): + prompt = enforce_token_limit("This is a test prompt.", tokenizer=mock_tokenizer, max_tokens_limit=100) + assert prompt == "This is a test prompt." assert not caplog.records diff --git a/test/preview/conftest.py b/test/preview/conftest.py new file mode 100644 index 0000000000..b8abfa41a6 --- /dev/null +++ b/test/preview/conftest.py @@ -0,0 +1,13 @@ +from unittest.mock import Mock +import pytest + + +@pytest.fixture() +def mock_tokenizer(): + """ + Tokenizes the string by splitting on spaces. + """ + tokenizer = Mock() + tokenizer.encode = lambda text: text.split() + tokenizer.decode = lambda tokens: " ".join(tokens) + return tokenizer From ec809e4ed7dfd3753848486e30592bc401162ed5 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 31 Aug 2023 12:42:33 +0200 Subject: [PATCH 14/25] add e2e test and small fixes --- .../components/test_chatgpt_generator.py | 42 +++++++------- .../components/generators/openai/_helpers.py | 2 +- .../components/generators/openai/chatgpt.py | 34 ++++------- .../openai/test_chatgpt_generator.py | 58 +++++++++---------- 4 files changed, 61 insertions(+), 75 deletions(-) diff --git a/e2e/preview/components/test_chatgpt_generator.py b/e2e/preview/components/test_chatgpt_generator.py index d92519fe27..a0fe8fc1cd 100644 --- a/e2e/preview/components/test_chatgpt_generator.py +++ b/e2e/preview/components/test_chatgpt_generator.py @@ -1,6 +1,6 @@ import os import pytest -from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator +from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator, default_streaming_callback @pytest.mark.skipif( @@ -18,27 +18,27 @@ def test_chatgpt_generator_run(): assert "Berlin" in results["replies"][1][0] -# @pytest.mark.skipif( -# not os.environ.get("OPENAI_API_KEY", None), -# reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", -# ) -# def test_chatgpt_generator_run_streaming(): - -# class Callback: -# def __init__(self): -# self.response = "" - -# def __call__(self, token): -# self.responses += token -# return token +@pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", +) +def test_chatgpt_generator_run_streaming(): + class Callback: + def __init__(self): + self.responses = "" -# callback = Callback() + def __call__(self, token, event_data): + self.responses += token + return token -# component = ChatGPTGenerator( -# os.environ.get("OPENAI_API_KEY"), stream=True, streaming_callback=callback -# ) -# results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + callback = Callback() + component = ChatGPTGenerator(os.environ.get("OPENAI_API_KEY"), stream=True, streaming_callback=callback) + results = component.run(prompts=["What's the capital of France?", "What's the capital of Germany?"], n=1) -# assert results == {"replies": [["test-response-a"], ["test-response-b"]]} + assert len(results["replies"]) == 2 + assert len(results["replies"][0]) == 1 + assert "Paris" in results["replies"][0][0] + assert len(results["replies"][1]) == 1 + assert "Berlin" in results["replies"][1][0] -# assert callback.responses == "test-response-a\ntest-response-b\n" + assert callback.responses == results["replies"][0][0] + results["replies"][1][0] diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index f5b16c11ad..fd7198c795 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -51,7 +51,7 @@ ) -def default_streaming_callback(token: str): +def default_streaming_callback(token: str, **kwargs): """ Default callback function for streaming responses from OpenAI API. Prints the tokens to stdout as soon as they are received and returns them. diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 1c17fe67c0..25afadc2e7 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -38,7 +38,7 @@ def __init__( api_key: Optional[str] = None, model_name: str = "gpt-3.5-turbo", system_prompt: Optional[str] = "You are a helpful assistant.", - max_reply_tokens: Optional[int] = 500, + max_tokens: Optional[int] = 500, temperature: Optional[float] = 0.7, top_p: Optional[float] = 1, n: Optional[int] = 1, @@ -46,7 +46,6 @@ def __init__( presence_penalty: Optional[float] = 0, frequency_penalty: Optional[float] = 0, logit_bias: Optional[Dict[str, float]] = None, - moderate_content: bool = True, stream: bool = False, streaming_callback: Optional[Callable] = default_streaming_callback, streaming_done_marker="[DONE]", @@ -59,7 +58,7 @@ def __init__( :param api_key: The OpenAI API key. :param model_name: The name or path of the underlying model. :param system_prompt: The prompt to be prepended to the user prompt. - :param max_reply_tokens: The maximum number of tokens the output text can have. + :param max_tokens: The maximum number of tokens the output text can have. :param temperature: What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. :param top_p: An alternative to sampling with temperature, called nucleus sampling, where the model @@ -73,9 +72,6 @@ def __init__( Bigger values mean the model will be less likely to repeat the same token in the text. :param logit_bias: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the values are the bias to add to that token. - :param moderate_content: If set to True, the input and generated answers are filtered for potentially - sensitive content using the [OpenAI Moderation API](https://platform.openai.com/docs/guides/moderation). - If the input or answers are flagged, an empty list is returned in place of the answers. :param stream: If set to True, the API will stream the response. The streaming_callback parameter is used to process the stream. If set to False, the response will be returned as a string. :param streaming_callback: A callback function that is called when a new token is received from the stream. @@ -96,15 +92,14 @@ def __init__( self.model_name = model_name self.system_prompt = system_prompt - self.max_reply_tokens = max_reply_tokens + self.max_tokens = max_tokens self.temperature = temperature self.top_p = top_p self.n = n - self.stop = stop + self.stop = stop or [] self.presence_penalty = presence_penalty self.frequency_penalty = frequency_penalty - self.logit_bias = logit_bias - self.moderate_content = moderate_content + self.logit_bias = logit_bias or {} self.stream = stream self.streaming_callback = streaming_callback self.streaming_done_marker = streaming_done_marker @@ -137,7 +132,7 @@ def to_dict(self) -> Dict[str, Any]: api_key=self.api_key, model_name=self.model_name, system_prompt=self.system_prompt, - max_reply_tokens=self.max_reply_tokens, + max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p, n=self.n, @@ -145,7 +140,6 @@ def to_dict(self) -> Dict[str, Any]: presence_penalty=self.presence_penalty, frequency_penalty=self.frequency_penalty, logit_bias=self.logit_bias, - moderate_content=self.moderate_content, stream=self.stream, # FIXME how to serialize the streaming callback? streaming_done_marker=self.streaming_done_marker, @@ -168,7 +162,7 @@ def run( api_key: Optional[str] = None, model_name: Optional[str] = None, system_prompt: Optional[str] = None, - max_reply_tokens: Optional[int] = None, + max_tokens: Optional[int] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, n: Optional[int] = None, @@ -176,7 +170,6 @@ def run( presence_penalty: Optional[float] = None, frequency_penalty: Optional[float] = None, logit_bias: Optional[Dict[str, float]] = None, - moderate_content: Optional[bool] = None, api_base_url: Optional[str] = None, openai_organization: Optional[str] = None, stream: Optional[bool] = None, @@ -190,7 +183,7 @@ def run( :param api_key: The OpenAI API key. :param model_name: The name or path of the underlying model. :param system_prompt: The prompt to be prepended to the user prompt. - :param max_reply_tokens: The maximum number of tokens the output text can have. + :param max_tokens: The maximum number of tokens the output text can have. :param temperature: What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. :param top_p: An alternative to sampling with temperature, called nucleus sampling, where the model @@ -204,9 +197,6 @@ def run( Bigger values mean the model will be less likely to repeat the same token in the text. :param logit_bias: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the values are the bias to add to that token. - :param moderate_content: If set to True, the input and generated answers are filtered for potentially - sensitive content using the [OpenAI Moderation API](https://platform.openai.com/docs/guides/moderation). - If the input or answers are flagged, an empty list is returned in place of the answers. :param stream: If set to True, the API will stream the response. The streaming_callback parameter is used to process the stream. If set to False, the response will be returned as a string. :param streaming_callback: A callback function that is called when a new token is received from the stream. @@ -223,7 +213,7 @@ def run( api_key = api_key if api_key is not None else self.api_key model_name = model_name if model_name is not None else self.model_name system_prompt = system_prompt if system_prompt is not None else self.system_prompt - max_reply_tokens = max_reply_tokens if max_reply_tokens is not None else self.max_reply_tokens + max_tokens = max_tokens if max_tokens is not None else self.max_tokens temperature = temperature if temperature is not None else self.temperature top_p = top_p if top_p is not None else self.top_p n = n if n is not None else self.n @@ -231,7 +221,6 @@ def run( presence_penalty = presence_penalty if presence_penalty is not None else self.presence_penalty frequency_penalty = frequency_penalty if frequency_penalty is not None else self.frequency_penalty logit_bias = logit_bias if logit_bias is not None else self.logit_bias - moderate_content = moderate_content if moderate_content is not None else self.moderate_content stream = stream if stream is not None else self.stream streaming_callback = streaming_callback if streaming_callback is not None else self.streaming_callback streaming_done_marker = ( @@ -245,7 +234,7 @@ def run( parameters = { "model": model_name, - "max_reply_tokens": max_reply_tokens, + "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p, "n": n, @@ -254,7 +243,6 @@ def run( "presence_penalty": presence_penalty, "frequency_penalty": frequency_penalty, "logit_bias": logit_bias, - "moderate_content": moderate_content, } headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} if openai_organization: @@ -264,7 +252,7 @@ def run( replies = [] for prompt in prompts: system_prompt, prompt = enforce_token_limit_chat( - prompts=[system_prompt, prompts[0]], + prompts=[system_prompt, prompt], tokenizer=self.tokenizer, max_tokens_limit=self.max_tokens_limit, tokens_per_message_overhead=TOKENS_PER_MESSAGE_OVERHEAD, diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index f1ff3bc3de..c3d2d8e1f2 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -14,15 +14,14 @@ def test_init_default(self, caplog): assert component.api_key is None assert component.model_name == "gpt-3.5-turbo" assert component.system_prompt == "You are a helpful assistant." - assert component.max_reply_tokens == 500 + assert component.max_tokens == 500 assert component.temperature == 0.7 assert component.top_p == 1 assert component.n == 1 - assert component.stop is None + assert component.stop == [] assert component.presence_penalty == 0 assert component.frequency_penalty == 0 - assert component.logit_bias == None - assert component.moderate_content is True + assert component.logit_bias == {} assert component.stream is False assert component.streaming_callback == default_streaming_callback assert component.streaming_done_marker == "[DONE]" @@ -51,7 +50,7 @@ def test_init_with_parameters(self, caplog, monkeypatch): api_key="test-api-key", model_name="test-model-name", system_prompt="test-system-prompt", - max_reply_tokens=20, + max_tokens=20, temperature=1, top_p=5, n=10, @@ -59,7 +58,6 @@ def test_init_with_parameters(self, caplog, monkeypatch): presence_penalty=0.5, frequency_penalty=0.4, logit_bias={"test-logit-bias": 0.3}, - moderate_content=False, stream=True, streaming_callback=callback, streaming_done_marker="test-marker", @@ -69,7 +67,7 @@ def test_init_with_parameters(self, caplog, monkeypatch): assert component.api_key == "test-api-key" assert component.model_name == "test-model-name" assert component.system_prompt == "test-system-prompt" - assert component.max_reply_tokens == 20 + assert component.max_tokens == 20 assert component.temperature == 1 assert component.top_p == 5 assert component.n == 10 @@ -77,7 +75,6 @@ def test_init_with_parameters(self, caplog, monkeypatch): assert component.presence_penalty == 0.5 assert component.frequency_penalty == 0.4 assert component.logit_bias == {"test-logit-bias": 0.3} - assert component.moderate_content is False assert component.stream is True assert component.streaming_callback == callback assert component.streaming_done_marker == "test-marker" @@ -115,7 +112,7 @@ def test_to_dict_with_custom_init_parameters(self): "api_key": None, "model_name": "gpt-3.5-turbo", "system_prompt": "You are a helpful assistant.", - "max_reply_tokens": 500, + "max_tokens": 500, "temperature": 0.7, "top_p": 1, "n": 1, @@ -123,7 +120,6 @@ def test_to_dict_with_custom_init_parameters(self): "presence_penalty": 0, "frequency_penalty": 0, "logit_bias": None, - "moderate_content": True, "stream": False, # FIXME serialize callback? "streaming_done_marker": "[DONE]", @@ -148,7 +144,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): api_key="test-api-key", model_name="test-model-name", system_prompt="test-system-prompt", - max_reply_tokens=20, + max_tokens=20, temperature=1, top_p=5, n=10, @@ -156,7 +152,6 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): presence_penalty=0.5, frequency_penalty=0.4, logit_bias={"test-logit-bias": 0.3}, - moderate_content=False, stream=True, streaming_callback=callback, streaming_done_marker="test-marker", @@ -170,7 +165,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "api_key": "test-api-key", "model_name": "test-model-name", "system_prompt": "test-system-prompt", - "max_reply_tokens": 20, + "max_tokens": 20, "temperature": 1, "top_p": 5, "n": 10, @@ -178,7 +173,6 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "presence_penalty": 0.5, "frequency_penalty": 0.4, "logit_bias": {"test-logit-bias": 0.3}, - "moderate_content": False, "stream": True, # FIXME serialize callback? "streaming_done_marker": "test-marker", @@ -204,7 +198,7 @@ def test_from_dict(self, monkeypatch): "api_key": "test-api-key", "model_name": "test-model-name", "system_prompt": "test-system-prompt", - "max_reply_tokens": 20, + "max_tokens": 20, "temperature": 1, "top_p": 5, "n": 10, @@ -212,7 +206,6 @@ def test_from_dict(self, monkeypatch): "presence_penalty": 0.5, "frequency_penalty": 0.4, "logit_bias": {"test-logit-bias": 0.3}, - "moderate_content": False, "stream": True, # FIXME serialize callback? "streaming_done_marker": "test-marker", @@ -224,7 +217,7 @@ def test_from_dict(self, monkeypatch): assert component.api_key == "test-api-key" assert component.model_name == "test-model-name" assert component.system_prompt == "test-system-prompt" - assert component.max_reply_tokens == 20 + assert component.max_tokens == 20 assert component.temperature == 1 assert component.top_p == 5 assert component.n == 10 @@ -232,7 +225,6 @@ def test_from_dict(self, monkeypatch): assert component.presence_penalty == 0.5 assert component.frequency_penalty == 0.4 assert component.logit_bias == {"test-logit-bias": 0.3} - assert component.moderate_content is False assert component.stream is True assert component.streaming_callback == default_streaming_callback assert component.streaming_done_marker == "test-marker" @@ -251,13 +243,19 @@ def test_run_no_api_key(self): def test_run(self): with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model") as query_patch: - query_patch.return_value = ["test-response", "another-response"] + query_patch.side_effect = lambda payload, **kwargs: [ + f"Response for {payload['messages'][1]['content']}", + f"Another Response for {payload['messages'][1]['content']}", + ] component = ChatGPTGenerator( api_key="test-api-key", openai_organization="test_orga_id", api_base_url="test-base-url" ) results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert results == { - "replies": [["test-response", "another-response"], ["test-response", "another-response"]] + "replies": [ + [f"Response for test-prompt-1", f"Another Response for test-prompt-1"], + [f"Response for test-prompt-2", f"Another Response for test-prompt-2"], + ] } query_patch.call_count == 2 query_patch.assert_any_call( @@ -269,16 +267,15 @@ def test_run(self): }, payload={ "model": "gpt-3.5-turbo", - "max_reply_tokens": 500, + "max_tokens": 500, "temperature": 0.7, "top_p": 1, "n": 1, "stream": False, - "stop": None, + "stop": [], "presence_penalty": 0, "frequency_penalty": 0, - "logit_bias": None, - "moderate_content": True, + "logit_bias": {}, "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "test-prompt-1"}, @@ -290,29 +287,30 @@ def test_run(self): def test_run_streaming(self): with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model_stream") as query_patch: - query_patch.side_effect = [["test-response-a"], ["test-response-b"]] + query_patch.side_effect = lambda payload, **kwargs: [ + f"Response for {payload['messages'][1]['content']}" + ] callback = Mock() component = ChatGPTGenerator( api_key="test-api-key", stream=True, streaming_callback=callback, streaming_done_marker="test-done" ) results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) - assert results == {"replies": [["test-response-a"], ["test-response-b"]]} + assert results == {"replies": [["Response for test-prompt-1"], ["Response for test-prompt-2"]]} query_patch.call_count == 2 query_patch.assert_any_call( url="https://api.openai.com/v1/chat/completions", headers={"Authorization": f"Bearer test-api-key", "Content-Type": "application/json"}, payload={ "model": "gpt-3.5-turbo", - "max_reply_tokens": 500, + "max_tokens": 500, "temperature": 0.7, "top_p": 1, "n": 1, "stream": True, - "stop": None, + "stop": [], "presence_penalty": 0, "frequency_penalty": 0, - "logit_bias": None, - "moderate_content": True, + "logit_bias": {}, "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "test-prompt-1"}, From 5d946f860173ada19924c3366aade1dd660104ee Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 31 Aug 2023 12:49:37 +0200 Subject: [PATCH 15/25] linting --- .../components/test_chatgpt_generator.py | 2 +- .../components/generators/openai/_helpers.py | 5 +++-- .../components/generators/openai/chatgpt.py | 22 ++++++++++--------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/e2e/preview/components/test_chatgpt_generator.py b/e2e/preview/components/test_chatgpt_generator.py index a0fe8fc1cd..2818b56379 100644 --- a/e2e/preview/components/test_chatgpt_generator.py +++ b/e2e/preview/components/test_chatgpt_generator.py @@ -1,6 +1,6 @@ import os import pytest -from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator, default_streaming_callback +from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator @pytest.mark.skipif( diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index fd7198c795..3c72e3da06 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -215,7 +215,7 @@ def enforce_token_limit_chat( max_tokens_limit, ) cut_prompts = [] - cut_prompts_lens = [] + cut_prompts_lens: List[int] = [] for prompt, prompt_len in zip(prompts, prompts_lens): if sum(cut_prompts_lens) + prompt_len <= max_tokens_limit: cut_prompts.append(prompt) @@ -223,4 +223,5 @@ def enforce_token_limit_chat( else: remaining_tokens = max_tokens_limit - sum(cut_prompts_lens) cut_prompts.append(enforce_token_limit(prompt, tokenizer, remaining_tokens)) - return cut_prompts + break + return cut_prompts diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 25afadc2e7..7971dc4dd5 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -101,27 +101,29 @@ def __init__( self.frequency_penalty = frequency_penalty self.logit_bias = logit_bias or {} self.stream = stream - self.streaming_callback = streaming_callback + self.streaming_callback = streaming_callback or default_streaming_callback self.streaming_done_marker = streaming_done_marker self.openai_organization = openai_organization self.api_base_url = api_base_url - self.tokenizer = None - for model_prefix in OPENAI_TOKENIZERS: + tokenizer = None + for model_prefix, tokenizer_name in OPENAI_TOKENIZERS.items(): if model_name.startswith(model_prefix): - self.tokenizer = tiktoken.get_encoding(OPENAI_TOKENIZERS[model_prefix]) + tokenizer = tiktoken.get_encoding(tokenizer_name) break - if not self.tokenizer: + if not tokenizer: raise ValueError(f"Tokenizer for model '{model_name}' not found.") + self.tokenizer = tokenizer - self.max_tokens_limit = None - for model_prefix in OPENAI_TOKENIZERS_TOKEN_LIMITS: + max_tokens_limit = None + for model_prefix, limit in OPENAI_TOKENIZERS_TOKEN_LIMITS.items(): if model_name.startswith(model_prefix): - self.max_tokens_limit = OPENAI_TOKENIZERS_TOKEN_LIMITS[model_prefix] + max_tokens_limit = limit break - if not self.max_tokens_limit: + if not max_tokens_limit: raise ValueError(f"Max tokens limit for model '{model_name}' not found.") + self.max_tokens_limit = max_tokens_limit def to_dict(self) -> Dict[str, Any]: """ @@ -212,7 +214,7 @@ def run( """ api_key = api_key if api_key is not None else self.api_key model_name = model_name if model_name is not None else self.model_name - system_prompt = system_prompt if system_prompt is not None else self.system_prompt + system_prompt = system_prompt if system_prompt is not None else self.system_prompt or "" max_tokens = max_tokens if max_tokens is not None else self.max_tokens temperature = temperature if temperature is not None else self.temperature top_p = top_p if top_p is not None else self.top_p From aa9ce334bb47dbbc97112ea4ced3794691b3f67b Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 31 Aug 2023 14:16:03 +0200 Subject: [PATCH 16/25] Add ChatGPTGenerator example --- .../components/generators/openai/_helpers.py | 2 +- haystack/preview/examples/__init__.py | 0 haystack/preview/examples/chat_gpt_example.py | 13 +++++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 haystack/preview/examples/__init__.py create mode 100644 haystack/preview/examples/chat_gpt_example.py diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index 3c72e3da06..99925d6e5e 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -95,7 +95,7 @@ def query_chat_model_stream( :param marker: A marker that indicates the end of the stream. It is used to determine when to stop streaming. :return: A list of strings containing the response from the OpenAI API. """ - response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT) + response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT, stream=True) raise_for_status(response=response) client = sseclient.SSEClient(response) diff --git a/haystack/preview/examples/__init__.py b/haystack/preview/examples/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/haystack/preview/examples/chat_gpt_example.py b/haystack/preview/examples/chat_gpt_example.py new file mode 100644 index 0000000000..4e13f86ee3 --- /dev/null +++ b/haystack/preview/examples/chat_gpt_example.py @@ -0,0 +1,13 @@ +import os + +from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator + +stream_response = False + +llm = ChatGPTGenerator( + api_key=os.environ.get("OPENAI_API_KEY"), model_name="gpt-3.5-turbo", max_tokens=256, stream=stream_response +) + +responses = llm.run(prompts=["What is the meaning of life?"]) +if not stream_response: + print(responses) From 931005730a0ecc16a2803329fed7a37cea00894c Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 31 Aug 2023 16:15:00 +0200 Subject: [PATCH 17/25] review feedback --- .../components/generators/openai/_helpers.py | 11 +++++------ .../components/generators/openai/chatgpt.py | 15 +-------------- .../generators/openai/test_chatgpt_generator.py | 15 ++------------- .../generators/openai/test_openai_helpers.py | 14 +++----------- 4 files changed, 11 insertions(+), 44 deletions(-) diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index 3c72e3da06..9b8f382220 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -30,15 +30,15 @@ "gpt-35-turbo": "cl100k_base", # https://github.com/openai/tiktoken/pull/72 } OPENAI_TOKENIZERS_TOKEN_LIMITS = { - "gpt2": 2049, # Ref: https://platform.openai.com/docs/models/gpt-3 "text-davinci": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3 - "gpt-35-turbo": 2049, # Ref: https://platform.openai.com/docs/models/gpt-3-5 - "gpt-3.5-turbo": 2049, # Ref: https://platform.openai.com/docs/models/gpt-3-5 + "gpt-35-turbo": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3-5 + "gpt-3.5-turbo": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3-5 "gpt-3.5-turbo-16k": 16384, # Ref: https://platform.openai.com/docs/models/gpt-3-5 "gpt-3": 4096, # Ref: https://platform.openai.com/docs/models/gpt-3 "gpt-4-32k": 32768, # Ref: https://platform.openai.com/docs/models/gpt-4 "gpt-4": 8192, # Ref: https://platform.openai.com/docs/models/gpt-4 } +OPENAI_STREAMING_DONE_MARKER = "[DONE]" # Ref: https://platform.openai.com/docs/api-reference/chat/create#stream #: Retry on OpenAI errors @@ -80,7 +80,7 @@ def query_chat_model(url: str, headers: Dict[str, str], payload: Dict[str, Any]) @openai_retry def query_chat_model_stream( - url: str, headers: Dict[str, str], payload: Dict[str, Any], callback: Callable, marker: str + url: str, headers: Dict[str, str], payload: Dict[str, Any], callback: Callable ) -> List[str]: """ Query ChatGPT and streams the response. Once the stream finishes, returns a list of strings just like @@ -92,7 +92,6 @@ def query_chat_model_stream( :param callback: A callback function that is called when a new token is received from the stream. The callback function should accept two parameters: the token received from the stream and **kwargs. The callback function should return the token that will be returned at the end of the streaming. - :param marker: A marker that indicates the end of the stream. It is used to determine when to stop streaming. :return: A list of strings containing the response from the OpenAI API. """ response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT) @@ -102,7 +101,7 @@ def query_chat_model_stream( tokens = [] try: for event in client.events(): - if event.data == marker: + if event.data == OPENAI_STREAMING_DONE_MARKER: break event_data = json.loads(event.data) delta = event_data["choices"][0]["delta"] diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 7971dc4dd5..1ee29adb78 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -48,7 +48,6 @@ def __init__( logit_bias: Optional[Dict[str, float]] = None, stream: bool = False, streaming_callback: Optional[Callable] = default_streaming_callback, - streaming_done_marker="[DONE]", api_base_url: str = "https://api.openai.com/v1", openai_organization: Optional[str] = None, ): @@ -78,8 +77,6 @@ def __init__( The callback function should accept two parameters: the token received from the stream and **kwargs. The callback function should return the token to be sent to the stream. If the callback function is not provided, the token is printed to stdout. - :param streaming_done_marker: A marker that indicates the end of the stream. The marker is used to determine - when to stop streaming. Defaults to "[DONE]". :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. :param openai_organization: The OpenAI organization ID. @@ -102,7 +99,6 @@ def __init__( self.logit_bias = logit_bias or {} self.stream = stream self.streaming_callback = streaming_callback or default_streaming_callback - self.streaming_done_marker = streaming_done_marker self.openai_organization = openai_organization self.api_base_url = api_base_url @@ -144,7 +140,6 @@ def to_dict(self) -> Dict[str, Any]: logit_bias=self.logit_bias, stream=self.stream, # FIXME how to serialize the streaming callback? - streaming_done_marker=self.streaming_done_marker, api_base_url=self.api_base_url, openai_organization=self.openai_organization, ) @@ -176,7 +171,6 @@ def run( openai_organization: Optional[str] = None, stream: Optional[bool] = None, streaming_callback: Optional[Callable] = None, - streaming_done_marker: Optional[str] = None, ): """ Queries the LLM with the prompts to produce replies. @@ -205,8 +199,6 @@ def run( The callback function should accept two parameters: the token received from the stream and **kwargs. The callback function should return the token to be sent to the stream. If the callback function is not provided, the token is printed to stdout. - :param streaming_done_marker: A marker that indicates the end of the stream. The marker is used to determine - when to stop streaming. Defaults to "[DONE]". :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. :param openai_organization: The OpenAI organization ID. @@ -225,9 +217,6 @@ def run( logit_bias = logit_bias if logit_bias is not None else self.logit_bias stream = stream if stream is not None else self.stream streaming_callback = streaming_callback if streaming_callback is not None else self.streaming_callback - streaming_done_marker = ( - streaming_done_marker if streaming_done_marker is not None else self.streaming_done_marker - ) api_base_url = api_base_url or self.api_base_url openai_organization = openai_organization if openai_organization is not None else self.openai_organization @@ -265,9 +254,7 @@ def run( "messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}], } if stream: - reply = query_chat_model_stream( - url=url, headers=headers, payload=payload, callback=streaming_callback, marker=streaming_done_marker - ) + reply = query_chat_model_stream(url=url, headers=headers, payload=payload, callback=streaming_callback) else: reply = query_chat_model(url=url, headers=headers, payload=payload) replies.append(reply) diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index c3d2d8e1f2..a8b0ef02a2 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -24,10 +24,9 @@ def test_init_default(self, caplog): assert component.logit_bias == {} assert component.stream is False assert component.streaming_callback == default_streaming_callback - assert component.streaming_done_marker == "[DONE]" assert component.api_base_url == "https://api.openai.com/v1" assert component.openai_organization is None - assert component.max_tokens_limit == 2049 + assert component.max_tokens_limit == 4097 tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") assert caplog.records[0].message == ( @@ -60,7 +59,6 @@ def test_init_with_parameters(self, caplog, monkeypatch): logit_bias={"test-logit-bias": 0.3}, stream=True, streaming_callback=callback, - streaming_done_marker="test-marker", api_base_url="test-base-url", openai_organization="test-orga-id", ) @@ -77,7 +75,6 @@ def test_init_with_parameters(self, caplog, monkeypatch): assert component.logit_bias == {"test-logit-bias": 0.3} assert component.stream is True assert component.streaming_callback == callback - assert component.streaming_done_marker == "test-marker" assert component.api_base_url == "test-base-url" assert component.openai_organization == "test-orga-id" assert component.max_tokens_limit == 10 @@ -122,7 +119,6 @@ def test_to_dict_with_custom_init_parameters(self): "logit_bias": None, "stream": False, # FIXME serialize callback? - "streaming_done_marker": "[DONE]", "api_base_url": "https://api.openai.com/v1", "openai_organization": None, }, @@ -154,7 +150,6 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): logit_bias={"test-logit-bias": 0.3}, stream=True, streaming_callback=callback, - streaming_done_marker="test-marker", api_base_url="test-base-url", openai_organization="test-orga-id", ) @@ -175,7 +170,6 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "logit_bias": {"test-logit-bias": 0.3}, "stream": True, # FIXME serialize callback? - "streaming_done_marker": "test-marker", "api_base_url": "test-base-url", "openai_organization": "test-orga-id", }, @@ -208,7 +202,6 @@ def test_from_dict(self, monkeypatch): "logit_bias": {"test-logit-bias": 0.3}, "stream": True, # FIXME serialize callback? - "streaming_done_marker": "test-marker", "api_base_url": "test-base-url", "openai_organization": "test-orga-id", }, @@ -227,7 +220,6 @@ def test_from_dict(self, monkeypatch): assert component.logit_bias == {"test-logit-bias": 0.3} assert component.stream is True assert component.streaming_callback == default_streaming_callback - assert component.streaming_done_marker == "test-marker" assert component.api_base_url == "test-base-url" assert component.openai_organization == "test-orga-id" assert component.max_tokens_limit == 10 @@ -291,9 +283,7 @@ def test_run_streaming(self): f"Response for {payload['messages'][1]['content']}" ] callback = Mock() - component = ChatGPTGenerator( - api_key="test-api-key", stream=True, streaming_callback=callback, streaming_done_marker="test-done" - ) + component = ChatGPTGenerator(api_key="test-api-key", stream=True, streaming_callback=callback) results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert results == {"replies": [["Response for test-prompt-1"], ["Response for test-prompt-2"]]} query_patch.call_count == 2 @@ -317,5 +307,4 @@ def test_run_streaming(self): ], }, callback=callback, - marker="test-done", ) diff --git a/test/preview/components/generators/openai/test_openai_helpers.py b/test/preview/components/generators/openai/test_openai_helpers.py index e23613d817..bd9689408a 100644 --- a/test/preview/components/generators/openai/test_openai_helpers.py +++ b/test/preview/components/generators/openai/test_openai_helpers.py @@ -131,17 +131,13 @@ def test_query_chat_model_stream(): Mock(data='{"choices": [{"delta": {"content": " are"}}]}'), Mock(data='{"choices": [{"delta": {"content": " you"}}]}'), Mock(data='{"choices": [{"delta": {"content": "?"}}]}'), - Mock(data="test-marker"), + Mock(data="[DONE]"), Mock(data='{"choices": [{"delta": {"content": "discarded tokens"}}]}'), ] mock_post.return_value = response replies = query_chat_model_stream( - url="test-url", - headers={"header": "test-header"}, - payload={"param": "test-param"}, - callback=callback, - marker="test-marker", + url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback ) mock_post.assert_called_once_with( "test-url", @@ -161,11 +157,7 @@ def test_query_chat_model_stream_fail(): mock_post.return_value = response with pytest.raises(OpenAIError): query_chat_model_stream( - url="test-url", - headers={"header": "test-header"}, - payload={"param": "test-param"}, - callback=callback, - marker="test-marker", + url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback ) mock_post.assert_called_with( "test-url", From b2e421dc02d75f15bb028797244538b92cfa2b66 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 31 Aug 2023 17:31:17 +0200 Subject: [PATCH 18/25] support for metadata --- .../components/test_chatgpt_generator.py | 16 +++++ .../components/generators/openai/_helpers.py | 32 +++++++-- .../components/generators/openai/chatgpt.py | 12 ++-- .../openai/test_chatgpt_generator.py | 30 ++++++--- .../generators/openai/test_openai_helpers.py | 65 ++++++++++++++++--- 5 files changed, 128 insertions(+), 27 deletions(-) diff --git a/e2e/preview/components/test_chatgpt_generator.py b/e2e/preview/components/test_chatgpt_generator.py index 2818b56379..ae243d1a4b 100644 --- a/e2e/preview/components/test_chatgpt_generator.py +++ b/e2e/preview/components/test_chatgpt_generator.py @@ -17,6 +17,14 @@ def test_chatgpt_generator_run(): assert len(results["replies"][1]) == 1 assert "Berlin" in results["replies"][1][0] + assert len(results["metadata"]) == 2 + assert len(results["metadata"][0]) == 1 + assert "gpt-3.5-turbo" in results["metadata"][0][0]["model"] + assert "stop" == results["metadata"][0][0]["finish_reason"] + assert len(results["metadata"][1]) == 1 + assert "gpt-3.5-turbo" in results["metadata"][1][0]["model"] + assert "stop" == results["metadata"][1][0]["finish_reason"] + @pytest.mark.skipif( not os.environ.get("OPENAI_API_KEY", None), @@ -42,3 +50,11 @@ def __call__(self, token, event_data): assert "Berlin" in results["replies"][1][0] assert callback.responses == results["replies"][0][0] + results["replies"][1][0] + + assert len(results["metadata"]) == 2 + assert len(results["metadata"][0]) == 1 + assert "gpt-3.5-turbo" in results["metadata"][0][0]["model"] + assert "stop" == results["metadata"][0][0]["finish_reason"] + assert len(results["metadata"][1]) == 1 + assert "gpt-3.5-turbo" in results["metadata"][1][0]["model"] + assert "stop" == results["metadata"][1][0]["finish_reason"] diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index a07f1e11a4..c25703de8d 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -1,4 +1,4 @@ -from typing import List, Callable, Dict, Any +from typing import List, Callable, Dict, Any, Tuple import os import logging import json @@ -75,13 +75,22 @@ def query_chat_model(url: str, headers: Dict[str, str], payload: Dict[str, Any]) json_response = json.loads(response.text) check_truncated_answers(result=json_response, payload=payload) check_filtered_answers(result=json_response, payload=payload) - return [choice["message"]["content"].strip() for choice in json_response["choices"]] + metadata = [ + { + "model": json_response.get("model", None), + "index": choice.get("index", None), + "finish_reason": choice.get("finish_reason", None), + **json_response.get("usage", {}), + } + for choice in json_response.get("choices", []) + ] + return [choice["message"]["content"].strip() for choice in json_response.get("choices", [])], metadata @openai_retry def query_chat_model_stream( url: str, headers: Dict[str, str], payload: Dict[str, Any], callback: Callable -) -> List[str]: +) -> Tuple[List[str], List[Dict[str, Any]]]: """ Query ChatGPT and streams the response. Once the stream finishes, returns a list of strings just like self._query_llm() @@ -98,6 +107,7 @@ def query_chat_model_stream( raise_for_status(response=response) client = sseclient.SSEClient(response) + event_data = None tokens = [] try: for event in client.events(): @@ -110,7 +120,21 @@ def query_chat_model_stream( tokens.append(callback(token, event_data=event_data["choices"])) finally: client.close() - return ["".join(tokens)] + + metadata = ( + [ + { + "model": event_data.get("model", None), + "index": choice.get("index", None), + "finish_reason": choice.get("finish_reason", None), + } + for choice in event_data.get("choices", []) + ] + if event_data + else [] + ) + + return ["".join(tokens)], metadata def raise_for_status(response: requests.Response): diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 1ee29adb78..1ad180e2c6 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -152,7 +152,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "ChatGPTGenerator": # FIXME how to deserialize the streaming callback? return default_from_dict(cls, data) - @component.output_types(replies=List[List[str]]) + @component.output_types(replies=List[List[str]], metadata=List[Dict[str, Any]]) def run( self, prompts: List[str], @@ -241,6 +241,7 @@ def run( url = f"{api_base_url}/chat/completions" replies = [] + metadata = [] for prompt in prompts: system_prompt, prompt = enforce_token_limit_chat( prompts=[system_prompt, prompt], @@ -254,9 +255,12 @@ def run( "messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}], } if stream: - reply = query_chat_model_stream(url=url, headers=headers, payload=payload, callback=streaming_callback) + reply, meta = query_chat_model_stream( + url=url, headers=headers, payload=payload, callback=streaming_callback + ) else: - reply = query_chat_model(url=url, headers=headers, payload=payload) + reply, meta = query_chat_model(url=url, headers=headers, payload=payload) replies.append(reply) + metadata.append(meta) - return {"replies": replies} + return {"replies": replies, "metadata": metadata} diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index a8b0ef02a2..362e6b0971 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -235,19 +235,25 @@ def test_run_no_api_key(self): def test_run(self): with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model") as query_patch: - query_patch.side_effect = lambda payload, **kwargs: [ - f"Response for {payload['messages'][1]['content']}", - f"Another Response for {payload['messages'][1]['content']}", - ] + query_patch.side_effect = lambda payload, **kwargs: ( + [ + f"Response for {payload['messages'][1]['content']}", + f"Another Response for {payload['messages'][1]['content']}", + ], + [{"metadata of": payload["messages"][1]["content"]}], + ) component = ChatGPTGenerator( api_key="test-api-key", openai_organization="test_orga_id", api_base_url="test-base-url" ) + results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + assert results == { "replies": [ [f"Response for test-prompt-1", f"Another Response for test-prompt-1"], [f"Response for test-prompt-2", f"Another Response for test-prompt-2"], - ] + ], + "metadata": [[{"metadata of": "test-prompt-1"}], [{"metadata of": "test-prompt-2"}]], } query_patch.call_count == 2 query_patch.assert_any_call( @@ -279,13 +285,19 @@ def test_run(self): def test_run_streaming(self): with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model_stream") as query_patch: - query_patch.side_effect = lambda payload, **kwargs: [ - f"Response for {payload['messages'][1]['content']}" - ] + query_patch.side_effect = lambda payload, **kwargs: ( + [f"Response for {payload['messages'][1]['content']}"], + [{"metadata of": payload["messages"][1]["content"]}], + ) callback = Mock() component = ChatGPTGenerator(api_key="test-api-key", stream=True, streaming_callback=callback) + results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) - assert results == {"replies": [["Response for test-prompt-1"], ["Response for test-prompt-2"]]} + + assert results == { + "replies": [["Response for test-prompt-1"], ["Response for test-prompt-2"]], + "metadata": [[{"metadata of": "test-prompt-1"}], [{"metadata of": "test-prompt-2"}]], + } query_patch.call_count == 2 query_patch.assert_any_call( url="https://api.openai.com/v1/chat/completions", diff --git a/test/preview/components/generators/openai/test_openai_helpers.py b/test/preview/components/generators/openai/test_openai_helpers.py index bd9689408a..b0a6de2774 100644 --- a/test/preview/components/generators/openai/test_openai_helpers.py +++ b/test/preview/components/generators/openai/test_openai_helpers.py @@ -87,9 +87,27 @@ def test_query_chat_model(): with patch("haystack.preview.components.generators.openai._helpers.requests.post") as mock_post: response = Mock() response.status_code = 200 - response.text = '{"choices": [{"finish_reason": "stop", "message": {"content": " Hello, how are you? "}}]}' + response.text = """ + { + "model": "test-model", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": {"content": " Hello, how are you? "} + } + ], + "usage": { + "prompt_tokens": 4, + "completion_tokens": 5, + "total_tokens": 9 + } + + }""" mock_post.return_value = response - replies = query_chat_model(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) + replies, metadata = query_chat_model( + url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"} + ) mock_post.assert_called_once_with( "test-url", headers={"header": "test-header"}, @@ -97,6 +115,16 @@ def test_query_chat_model(): timeout=OPENAI_TIMEOUT, ) assert replies == ["Hello, how are you?"] + assert metadata == [ + { + "model": "test-model", + "index": 0, + "finish_reason": "stop", + "prompt_tokens": 4, + "completion_tokens": 5, + "total_tokens": 9, + } + ] @pytest.mark.unit @@ -116,6 +144,21 @@ def test_query_chat_model_fail(): mock_post.call_count == OPENAI_MAX_RETRIES +def mock_chat_completion_stream(model="test-model", index=0, token="test", finish_reason="stop"): + return Mock( + data=f"""{{ + "model": "{model}", + "choices": [ + {{ + "index": {index}, + "delta": {{"content": "{token}"}}, + "finish_reason": "{finish_reason}" + }} + ] + }}""" + ) + + @pytest.mark.unit def test_query_chat_model_stream(): with patch("haystack.preview.components.generators.openai._helpers.requests.post") as mock_post: @@ -125,18 +168,18 @@ def test_query_chat_model_stream(): response.status_code = 200 mock_sseclient.return_value.events.return_value = [ - Mock(data='{"choices": [{"delta": {"content": "Hello"}}]}'), - Mock(data='{"choices": [{"delta": {"content": ","}}]}'), - Mock(data='{"choices": [{"delta": {"content": " how"}}]}'), - Mock(data='{"choices": [{"delta": {"content": " are"}}]}'), - Mock(data='{"choices": [{"delta": {"content": " you"}}]}'), - Mock(data='{"choices": [{"delta": {"content": "?"}}]}'), + mock_chat_completion_stream(token="Hello"), + mock_chat_completion_stream(token=","), + mock_chat_completion_stream(token=" how"), + mock_chat_completion_stream(token=" are"), + mock_chat_completion_stream(token=" you"), + mock_chat_completion_stream(token="?"), Mock(data="[DONE]"), - Mock(data='{"choices": [{"delta": {"content": "discarded tokens"}}]}'), + mock_chat_completion_stream(token="discarded tokens"), ] mock_post.return_value = response - replies = query_chat_model_stream( + replies, metadata = query_chat_model_stream( url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback ) mock_post.assert_called_once_with( @@ -144,8 +187,10 @@ def test_query_chat_model_stream(): headers={"header": "test-header"}, data=json.dumps({"param": "test-param"}), timeout=OPENAI_TIMEOUT, + stream=True, ) assert replies == ["|Hello||,|| how|| are|| you||?|"] + assert metadata == [{"model": "test-model", "index": 0, "finish_reason": "stop"}] @pytest.mark.unit From 28956978c8ccdbe92380a0f5beed49c0b9beda17 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 31 Aug 2023 18:19:48 +0200 Subject: [PATCH 19/25] mypy --- haystack/preview/components/generators/openai/_helpers.py | 2 +- haystack/preview/components/generators/openai/chatgpt.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index a16e42a5f7..ef623fc26d 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -60,7 +60,7 @@ def default_streaming_callback(token: str, **kwargs): @openai_retry -def query_chat_model(url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> List[str]: +def query_chat_model(url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Tuple[List[str], Dict[str, Any]]: """ Query ChatGPT without streaming the response. diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 1ad180e2c6..9d1eb46e21 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -240,8 +240,8 @@ def run( headers["OpenAI-Organization"] = openai_organization url = f"{api_base_url}/chat/completions" - replies = [] - metadata = [] + replies: List[List[str]] = [] + metadata: List[List[Dict[str, Any]]] = [] for prompt in prompts: system_prompt, prompt = enforce_token_limit_chat( prompts=[system_prompt, prompt], From 1538d610fd97c7ead7ed0d8fe68bfac989edfb0f Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 1 Sep 2023 10:18:11 +0200 Subject: [PATCH 20/25] mypy --- haystack/preview/components/generators/openai/_helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py index ef623fc26d..9616c16fe4 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/components/generators/openai/_helpers.py @@ -60,7 +60,9 @@ def default_streaming_callback(token: str, **kwargs): @openai_retry -def query_chat_model(url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Tuple[List[str], Dict[str, Any]]: +def query_chat_model( + url: str, headers: Dict[str, str], payload: Dict[str, Any] +) -> Tuple[List[str], List[Dict[str, Any]]]: """ Query ChatGPT without streaming the response. From 02cd61fc5d279ff736e7b49d24cb86c3b068ecad Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 1 Sep 2023 17:31:04 +0200 Subject: [PATCH 21/25] extract backend from generator and make it accept chats --- .../components/generators/openai/chatgpt.py | 178 +++------- haystack/preview/llm_backends/__init__.py | 0 haystack/preview/llm_backends/chat_message.py | 7 + .../preview/llm_backends/openai/__init__.py | 0 .../openai/_helpers.py | 74 ++-- .../preview/llm_backends/openai/chatgpt.py | 239 +++++++++++++ .../openai/errors.py | 0 .../openai/test_chatgpt_generator.py | 285 +++++----------- .../llm_backends/test_chatgpt_backend.py | 322 ++++++++++++++++++ .../test_openai_helpers.py | 19 +- 10 files changed, 744 insertions(+), 380 deletions(-) create mode 100644 haystack/preview/llm_backends/__init__.py create mode 100644 haystack/preview/llm_backends/chat_message.py create mode 100644 haystack/preview/llm_backends/openai/__init__.py rename haystack/preview/{components/generators => llm_backends}/openai/_helpers.py (77%) create mode 100644 haystack/preview/llm_backends/openai/chatgpt.py rename haystack/preview/{components/generators => llm_backends}/openai/errors.py (100%) create mode 100644 test/preview/llm_backends/test_chatgpt_backend.py rename test/preview/{components/generators/openai => llm_backends}/test_openai_helpers.py (92%) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 9d1eb46e21..811545afae 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -2,20 +2,10 @@ import logging -from haystack.preview.lazy_imports import LazyImport from haystack.preview import component, default_from_dict, default_to_dict -from haystack.preview.components.generators.openai._helpers import ( - default_streaming_callback, - query_chat_model, - query_chat_model_stream, - enforce_token_limit_chat, - OPENAI_TOKENIZERS, - OPENAI_TOKENIZERS_TOKEN_LIMITS, -) - - -with LazyImport() as tiktoken_import: - import tiktoken +from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend +from haystack.preview.llm_backends.chat_message import ChatMessage +from haystack.preview.llm_backends.openai._helpers import default_streaming_callback logger = logging.getLogger(__name__) @@ -33,11 +23,13 @@ class ChatGPTGenerator: See [OpenAI ChatGPT API](https://platform.openai.com/docs/guides/chat) for more details. """ + # TODO support function calling! + def __init__( self, api_key: Optional[str] = None, model_name: str = "gpt-3.5-turbo", - system_prompt: Optional[str] = "You are a helpful assistant.", + system_prompt: Optional[str] = None, max_tokens: Optional[int] = 500, temperature: Optional[float] = 0.7, top_p: Optional[float] = 1, @@ -82,67 +74,29 @@ def __init__( See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. """ - if not api_key: - logger.warning("OpenAI API key is missing. You will need to provide an API key to Pipeline.run().") - - self.api_key = api_key - self.model_name = model_name + self.llm = ChatGPTBackend( + api_key=api_key, + model_name=model_name, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + n=n, + stop=stop, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + logit_bias=logit_bias, + stream=stream, + streaming_callback=streaming_callback, + api_base_url=api_base_url, + openai_organization=openai_organization, + ) self.system_prompt = system_prompt - self.max_tokens = max_tokens - self.temperature = temperature - self.top_p = top_p - self.n = n - self.stop = stop or [] - self.presence_penalty = presence_penalty - self.frequency_penalty = frequency_penalty - self.logit_bias = logit_bias or {} - self.stream = stream - self.streaming_callback = streaming_callback or default_streaming_callback - - self.openai_organization = openai_organization - self.api_base_url = api_base_url - - tokenizer = None - for model_prefix, tokenizer_name in OPENAI_TOKENIZERS.items(): - if model_name.startswith(model_prefix): - tokenizer = tiktoken.get_encoding(tokenizer_name) - break - if not tokenizer: - raise ValueError(f"Tokenizer for model '{model_name}' not found.") - self.tokenizer = tokenizer - - max_tokens_limit = None - for model_prefix, limit in OPENAI_TOKENIZERS_TOKEN_LIMITS.items(): - if model_name.startswith(model_prefix): - max_tokens_limit = limit - break - if not max_tokens_limit: - raise ValueError(f"Max tokens limit for model '{model_name}' not found.") - self.max_tokens_limit = max_tokens_limit - def to_dict(self) -> Dict[str, Any]: """ Serialize this component to a dictionary. """ - return default_to_dict( - self, - api_key=self.api_key, - model_name=self.model_name, - system_prompt=self.system_prompt, - max_tokens=self.max_tokens, - temperature=self.temperature, - top_p=self.top_p, - n=self.n, - stop=self.stop, - presence_penalty=self.presence_penalty, - frequency_penalty=self.frequency_penalty, - logit_bias=self.logit_bias, - stream=self.stream, - # FIXME how to serialize the streaming callback? - api_base_url=self.api_base_url, - openai_organization=self.openai_organization, - ) + return default_to_dict(self, system_prompt=self.system_prompt, **self.llm.to_dict()) @classmethod def from_dict(cls, data: Dict[str, Any]) -> "ChatGPTGenerator": @@ -167,10 +121,10 @@ def run( presence_penalty: Optional[float] = None, frequency_penalty: Optional[float] = None, logit_bias: Optional[Dict[str, float]] = None, - api_base_url: Optional[str] = None, - openai_organization: Optional[str] = None, stream: Optional[bool] = None, streaming_callback: Optional[Callable] = None, + api_base_url: Optional[str] = None, + openai_organization: Optional[str] = None, ): """ Queries the LLM with the prompts to produce replies. @@ -204,62 +158,36 @@ def run( See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. """ - api_key = api_key if api_key is not None else self.api_key - model_name = model_name if model_name is not None else self.model_name - system_prompt = system_prompt if system_prompt is not None else self.system_prompt or "" - max_tokens = max_tokens if max_tokens is not None else self.max_tokens - temperature = temperature if temperature is not None else self.temperature - top_p = top_p if top_p is not None else self.top_p - n = n if n is not None else self.n - stop = stop if stop is not None else self.stop - presence_penalty = presence_penalty if presence_penalty is not None else self.presence_penalty - frequency_penalty = frequency_penalty if frequency_penalty is not None else self.frequency_penalty - logit_bias = logit_bias if logit_bias is not None else self.logit_bias - stream = stream if stream is not None else self.stream - streaming_callback = streaming_callback if streaming_callback is not None else self.streaming_callback - api_base_url = api_base_url or self.api_base_url - openai_organization = openai_organization if openai_organization is not None else self.openai_organization - - if not api_key: - raise ValueError("OpenAI API key is missing. Please provide an API key.") - - parameters = { - "model": model_name, - "max_tokens": max_tokens, - "temperature": temperature, - "top_p": top_p, - "n": n, - "stream": stream, - "stop": stop, - "presence_penalty": presence_penalty, - "frequency_penalty": frequency_penalty, - "logit_bias": logit_bias, - } - headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} - if openai_organization: - headers["OpenAI-Organization"] = openai_organization - url = f"{api_base_url}/chat/completions" - - replies: List[List[str]] = [] - metadata: List[List[Dict[str, Any]]] = [] + system_prompt = system_prompt if system_prompt is not None else self.system_prompt + if system_prompt: + system_message = ChatMessage(content=system_prompt, role="system") + chats = [] for prompt in prompts: - system_prompt, prompt = enforce_token_limit_chat( - prompts=[system_prompt, prompt], - tokenizer=self.tokenizer, - max_tokens_limit=self.max_tokens_limit, - tokens_per_message_overhead=TOKENS_PER_MESSAGE_OVERHEAD, - ) - - payload = { - **parameters, - "messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}], - } - if stream: - reply, meta = query_chat_model_stream( - url=url, headers=headers, payload=payload, callback=streaming_callback - ) + message = ChatMessage(content=prompt, role="user") + if system_prompt: + chats.append([system_message, message]) else: - reply, meta = query_chat_model(url=url, headers=headers, payload=payload) + chats.append([message]) + + replies, metadata = [], [] + for chat in chats: + reply, meta = self.llm.query( + chat=chat, + api_key=api_key, + model_name=model_name, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + n=n, + stop=stop, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + logit_bias=logit_bias, + api_base_url=api_base_url, + openai_organization=openai_organization, + stream=stream, + streaming_callback=streaming_callback, + ) replies.append(reply) metadata.append(meta) diff --git a/haystack/preview/llm_backends/__init__.py b/haystack/preview/llm_backends/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/haystack/preview/llm_backends/chat_message.py b/haystack/preview/llm_backends/chat_message.py new file mode 100644 index 0000000000..ca20f905f3 --- /dev/null +++ b/haystack/preview/llm_backends/chat_message.py @@ -0,0 +1,7 @@ +from dataclasses import dataclass + + +@dataclass +class ChatMessage: + content: str + role: str diff --git a/haystack/preview/llm_backends/openai/__init__.py b/haystack/preview/llm_backends/openai/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/llm_backends/openai/_helpers.py similarity index 77% rename from haystack/preview/components/generators/openai/_helpers.py rename to haystack/preview/llm_backends/openai/_helpers.py index 9616c16fe4..bd4aaac3ca 100644 --- a/haystack/preview/components/generators/openai/_helpers.py +++ b/haystack/preview/llm_backends/openai/_helpers.py @@ -8,11 +8,8 @@ import sseclient from haystack.preview.lazy_imports import LazyImport -from haystack.preview.components.generators.openai.errors import ( - OpenAIError, - OpenAIRateLimitError, - OpenAIUnauthorizedError, -) +from haystack.preview.llm_backends.chat_message import ChatMessage +from haystack.preview.llm_backends.openai.errors import OpenAIError, OpenAIRateLimitError, OpenAIUnauthorizedError with LazyImport("Run 'pip install tiktoken'") as tiktoken_import: import tiktoken @@ -75,7 +72,6 @@ def query_chat_model( raise_for_status(response=response) json_response = json.loads(response.text) check_truncated_answers(result=json_response, payload=payload) - check_filtered_answers(result=json_response, payload=payload) metadata = [ { "model": json_response.get("model", None), @@ -121,7 +117,6 @@ def query_chat_model_stream( tokens.append(callback(token, event_data=event_data["choices"])) finally: client.close() - metadata = ( [ { @@ -134,7 +129,6 @@ def query_chat_model_stream( if event_data else [] ) - return ["".join(tokens)], metadata @@ -174,23 +168,6 @@ def check_truncated_answers(result: Dict[str, Any], payload: Dict[str, Any]): ) -def check_filtered_answers(result: Dict[str, Any], payload: Dict[str, Any]): - """ - Check the `finish_reason` the answers returned by OpenAI completions endpoint. - If the `finish_reason` is `content_filter`, log a warning to the user. - - :param result: The result returned from the OpenAI API. - :param payload: The payload sent to the OpenAI API. - """ - filtered_completions = sum(1 for ans in result["choices"] if ans["finish_reason"] == "content_filter") - if filtered_completions > 0: - logger.warning( - "%s out of the %s completions have omitted content due to a flag from OpenAI content filters.", - filtered_completions, - payload["n"], - ) - - def enforce_token_limit(prompt: str, tokenizer: "tiktoken.Encoding", max_tokens_limit: int) -> str: """ Ensure that the length of the prompt is within the max tokens limit of the model. @@ -216,36 +193,41 @@ def enforce_token_limit(prompt: str, tokenizer: "tiktoken.Encoding", max_tokens_ def enforce_token_limit_chat( - prompts: List[str], tokenizer: "tiktoken.Encoding", max_tokens_limit: int, tokens_per_message_overhead: int + chat: List[ChatMessage], tokenizer: "tiktoken.Encoding", max_tokens_limit: int, tokens_per_message_overhead: int ) -> List[str]: """ - Ensure that the length of the list of prompts is within the max tokens limit of the model. - If needed, truncate the prompts text and list so that it fits within the limit. + Ensure that the length of the chat is within the max tokens limit of the model. + If needed, truncate the messages so that the chat fits within the limit. - :param prompts: Prompts text to be sent to the generative model. - :param tokenizer: The tokenizer used to encode the prompt. + :param chat: The chat messages to be sent to the generative model. + :param tokenizer: The tokenizer used to encode the chat. :param max_tokens_limit: The max tokens limit of the model. :param tokens_per_message_overhead: The number of tokens that are added to the prompt text for each message. - :return: A list of prompts that fits within the max tokens limit of the model. + :return: A chat that fits within the max tokens limit of the model. """ - prompts_lens = [len(tokenizer.encode(prompt)) + tokens_per_message_overhead for prompt in prompts] - if (total_prompt_length := sum(prompts_lens)) <= max_tokens_limit: - return prompts + print(chat) + messages_len = [len(tokenizer.encode(message.content)) + tokens_per_message_overhead for message in chat] + if (total_chat_length := sum(messages_len)) <= max_tokens_limit: + return chat logger.warning( - "The prompts have been truncated from %s tokens to %s tokens to fit within the max token limit. " - "Reduce the length of the prompt to prevent it from being cut off.", - total_prompt_length, + "The chat have been truncated from %s tokens to %s tokens to fit within the max token limit. " + "Reduce the length of the chat to prevent it from being cut off.", + total_chat_length, max_tokens_limit, ) - cut_prompts = [] - cut_prompts_lens: List[int] = [] - for prompt, prompt_len in zip(prompts, prompts_lens): - if sum(cut_prompts_lens) + prompt_len <= max_tokens_limit: - cut_prompts.append(prompt) - cut_prompts_lens.append(prompt_len) + cut_messages = [] + cut_messages_len: List[int] = [] + for message, message_len in zip(chat, messages_len): + if sum(cut_messages_len) + message_len <= max_tokens_limit: + cut_messages.append(message) + cut_messages_len.append(message_len) else: - remaining_tokens = max_tokens_limit - sum(cut_prompts_lens) - cut_prompts.append(enforce_token_limit(prompt, tokenizer, remaining_tokens)) + remaining_tokens = max_tokens_limit - sum(cut_messages_len) + cut_messages.append( + ChatMessage( + content=enforce_token_limit(message.content, tokenizer, remaining_tokens), role=message.role + ) + ) break - return cut_prompts + return cut_messages diff --git a/haystack/preview/llm_backends/openai/chatgpt.py b/haystack/preview/llm_backends/openai/chatgpt.py new file mode 100644 index 0000000000..05294f6318 --- /dev/null +++ b/haystack/preview/llm_backends/openai/chatgpt.py @@ -0,0 +1,239 @@ +from typing import Optional, List, Callable, Dict, Any + +import logging +from dataclasses import asdict + +from haystack.preview.lazy_imports import LazyImport +from haystack.preview.llm_backends.chat_message import ChatMessage +from haystack.preview.llm_backends.openai._helpers import ( + default_streaming_callback, + query_chat_model, + query_chat_model_stream, + enforce_token_limit_chat, + OPENAI_TOKENIZERS, + OPENAI_TOKENIZERS_TOKEN_LIMITS, +) + + +with LazyImport() as tiktoken_import: + import tiktoken + + +logger = logging.getLogger(__name__) + + +TOKENS_PER_MESSAGE_OVERHEAD = 4 + + +class ChatGPTBackend: + """ + ChatGPT LLM interface. + + Queries ChatGPT using OpenAI's GPT-3 ChatGPT API. Invocations are made using REST API. + See [OpenAI ChatGPT API](https://platform.openai.com/docs/guides/chat) for more details. + """ + + # TODO support function calling! + + def __init__( + self, + api_key: Optional[str] = None, + model_name: str = "gpt-3.5-turbo", + max_tokens: Optional[int] = 500, + temperature: Optional[float] = 0.7, + top_p: Optional[float] = 1, + n: Optional[int] = 1, + stop: Optional[List[str]] = None, + presence_penalty: Optional[float] = 0, + frequency_penalty: Optional[float] = 0, + logit_bias: Optional[Dict[str, float]] = None, + stream: bool = False, + streaming_callback: Optional[Callable] = default_streaming_callback, + api_base_url: str = "https://api.openai.com/v1", + openai_organization: Optional[str] = None, + ): + """ + Creates an instance of ChatGPTGenerator for OpenAI's GPT-3.5 model. + + :param api_key: The OpenAI API key. + :param model_name: The name or path of the underlying model. + :param max_tokens: The maximum number of tokens the output text can have. + :param temperature: What sampling temperature to use. Higher values means the model will take more risks. + Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + :param top_p: An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + :param n: How many completions to generate for each prompt. + :param stop: One or more sequences where the API will stop generating further tokens. + :param presence_penalty: What penalty to apply if a token is already present at all. Bigger values mean + the model will be less likely to repeat the same token in the text. + :param frequency_penalty: What penalty to apply if a token has already been generated in the text. + Bigger values mean the model will be less likely to repeat the same token in the text. + :param logit_bias: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the + values are the bias to add to that token. + :param stream: If set to True, the API will stream the response. The streaming_callback parameter + is used to process the stream. If set to False, the response will be returned as a string. + :param streaming_callback: A callback function that is called when a new token is received from the stream. + The callback function should accept two parameters: the token received from the stream and **kwargs. + The callback function should return the token to be sent to the stream. If the callback function is not + provided, the token is printed to stdout. + :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. + :param openai_organization: The OpenAI organization ID. + + See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. + """ + if not api_key: + logger.warning("OpenAI API key is missing. You will need to provide an API key to Pipeline.run().") + + self.api_key = api_key + self.model_name = model_name + + self.max_tokens = max_tokens + self.temperature = temperature + self.top_p = top_p + self.n = n + self.stop = stop or [] + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.logit_bias = logit_bias or {} + self.stream = stream + self.streaming_callback = streaming_callback or default_streaming_callback + + self.openai_organization = openai_organization + self.api_base_url = api_base_url + + tokenizer = None + for model_prefix, tokenizer_name in OPENAI_TOKENIZERS.items(): + if model_name.startswith(model_prefix): + tokenizer = tiktoken.get_encoding(tokenizer_name) + break + if not tokenizer: + raise ValueError(f"Tokenizer for model '{model_name}' not found.") + self.tokenizer = tokenizer + + max_tokens_limit = None + for model_prefix, limit in OPENAI_TOKENIZERS_TOKEN_LIMITS.items(): + if model_name.startswith(model_prefix): + max_tokens_limit = limit + break + if not max_tokens_limit: + raise ValueError(f"Max tokens limit for model '{model_name}' not found.") + self.max_tokens_limit = max_tokens_limit + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize to a dictionary. + """ + return { + "api_key": self.api_key, + "model_name": self.model_name, + "max_tokens": self.max_tokens, + "temperature": self.temperature, + "top_p": self.top_p, + "n": self.n, + "stop": self.stop, + "presence_penalty": self.presence_penalty, + "frequency_penalty": self.frequency_penalty, + "logit_bias": self.logit_bias, + "stream": self.stream, + # FIXME how to serialize the streaming callback? + "api_base_url": self.api_base_url, + "openai_organization": self.openai_organization, + } + + def query( + self, + chat: List[ChatMessage], + api_key: Optional[str] = None, + model_name: Optional[str] = None, + max_tokens: Optional[int] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + n: Optional[int] = None, + stop: Optional[List[str]] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[Dict[str, float]] = None, + api_base_url: Optional[str] = None, + openai_organization: Optional[str] = None, + stream: Optional[bool] = None, + streaming_callback: Optional[Callable] = None, + ): + """ + Queries the LLM with the prompts to produce replies. + + :param chat: The chat to be sent to the generative model. + :param api_key: The OpenAI API key. + :param model_name: The name or path of the underlying model. + :param max_tokens: The maximum number of tokens the output text can have. + :param temperature: What sampling temperature to use. Higher values means the model will take more risks. + Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + :param top_p: An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + :param n: How many completions to generate for each prompt. + :param stop: One or more sequences where the API will stop generating further tokens. + :param presence_penalty: What penalty to apply if a token is already present at all. Bigger values mean + the model will be less likely to repeat the same token in the text. + :param frequency_penalty: What penalty to apply if a token has already been generated in the text. + Bigger values mean the model will be less likely to repeat the same token in the text. + :param logit_bias: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the + values are the bias to add to that token. + :param stream: If set to True, the API will stream the response. The streaming_callback parameter + is used to process the stream. If set to False, the response will be returned as a string. + :param streaming_callback: A callback function that is called when a new token is received from the stream. + The callback function should accept two parameters: the token received from the stream and **kwargs. + The callback function should return the token to be sent to the stream. If the callback function is not + provided, the token is printed to stdout. + :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. + :param openai_organization: The OpenAI organization ID. + + See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. + """ + api_key = api_key if api_key is not None else self.api_key + + if not api_key: + raise ValueError("OpenAI API key is missing. Please provide an API key.") + + model_name = model_name if model_name is not None else self.model_name + max_tokens = max_tokens if max_tokens is not None else self.max_tokens + temperature = temperature if temperature is not None else self.temperature + top_p = top_p if top_p is not None else self.top_p + n = n if n is not None else self.n + stop = stop if stop is not None else self.stop + presence_penalty = presence_penalty if presence_penalty is not None else self.presence_penalty + frequency_penalty = frequency_penalty if frequency_penalty is not None else self.frequency_penalty + logit_bias = logit_bias if logit_bias is not None else self.logit_bias + stream = stream if stream is not None else self.stream + streaming_callback = streaming_callback if streaming_callback is not None else self.streaming_callback + api_base_url = api_base_url or self.api_base_url + openai_organization = openai_organization if openai_organization is not None else self.openai_organization + + parameters = { + "model": model_name, + "max_tokens": max_tokens, + "temperature": temperature, + "top_p": top_p, + "n": n, + "stream": stream, + "stop": stop, + "presence_penalty": presence_penalty, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, + } + headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + if openai_organization: + headers["OpenAI-Organization"] = openai_organization + url = f"{api_base_url}/chat/completions" + + chat = enforce_token_limit_chat( + chat=chat, + tokenizer=self.tokenizer, + max_tokens_limit=self.max_tokens_limit, + tokens_per_message_overhead=TOKENS_PER_MESSAGE_OVERHEAD, + ) + payload = {**parameters, "messages": [asdict(message) for message in chat]} + if stream: + return query_chat_model_stream(url=url, headers=headers, payload=payload, callback=streaming_callback) + else: + return query_chat_model(url=url, headers=headers, payload=payload) diff --git a/haystack/preview/components/generators/openai/errors.py b/haystack/preview/llm_backends/openai/errors.py similarity index 100% rename from haystack/preview/components/generators/openai/errors.py rename to haystack/preview/llm_backends/openai/errors.py diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index 362e6b0971..e967836178 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -1,4 +1,4 @@ -from unittest.mock import patch, Mock +from unittest.mock import patch import pytest @@ -9,24 +9,24 @@ class TestChatGPTGenerator: @pytest.mark.unit def test_init_default(self, caplog): - with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: component = ChatGPTGenerator() - assert component.api_key is None - assert component.model_name == "gpt-3.5-turbo" - assert component.system_prompt == "You are a helpful assistant." - assert component.max_tokens == 500 - assert component.temperature == 0.7 - assert component.top_p == 1 - assert component.n == 1 - assert component.stop == [] - assert component.presence_penalty == 0 - assert component.frequency_penalty == 0 - assert component.logit_bias == {} - assert component.stream is False - assert component.streaming_callback == default_streaming_callback - assert component.api_base_url == "https://api.openai.com/v1" - assert component.openai_organization is None - assert component.max_tokens_limit == 4097 + assert component.system_prompt is None + assert component.llm.api_key is None + assert component.llm.model_name == "gpt-3.5-turbo" + assert component.llm.max_tokens == 500 + assert component.llm.temperature == 0.7 + assert component.llm.top_p == 1 + assert component.llm.n == 1 + assert component.llm.stop == [] + assert component.llm.presence_penalty == 0 + assert component.llm.frequency_penalty == 0 + assert component.llm.logit_bias == {} + assert component.llm.stream is False + assert component.llm.streaming_callback == default_streaming_callback + assert component.llm.api_base_url == "https://api.openai.com/v1" + assert component.llm.openai_organization is None + assert component.llm.max_tokens_limit == 4097 tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") assert caplog.records[0].message == ( @@ -34,20 +34,12 @@ def test_init_default(self, caplog): ) @pytest.mark.unit - def test_init_with_parameters(self, caplog, monkeypatch): - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", - {"test-model-name": "test-encoding"}, - ) - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", - {"test-model-name": 10}, - ) - with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + def test_init_with_parameters(self, caplog): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: callback = lambda x: x component = ChatGPTGenerator( api_key="test-api-key", - model_name="test-model-name", + model_name="gpt-4", system_prompt="test-system-prompt", max_tokens=20, temperature=1, @@ -62,45 +54,29 @@ def test_init_with_parameters(self, caplog, monkeypatch): api_base_url="test-base-url", openai_organization="test-orga-id", ) - assert component.api_key == "test-api-key" - assert component.model_name == "test-model-name" assert component.system_prompt == "test-system-prompt" - assert component.max_tokens == 20 - assert component.temperature == 1 - assert component.top_p == 5 - assert component.n == 10 - assert component.stop == ["test-stop-word"] - assert component.presence_penalty == 0.5 - assert component.frequency_penalty == 0.4 - assert component.logit_bias == {"test-logit-bias": 0.3} - assert component.stream is True - assert component.streaming_callback == callback - assert component.api_base_url == "test-base-url" - assert component.openai_organization == "test-orga-id" - assert component.max_tokens_limit == 10 + assert component.llm.api_key == "test-api-key" + assert component.llm.model_name == "gpt-4" + assert component.llm.max_tokens == 20 + assert component.llm.temperature == 1 + assert component.llm.top_p == 5 + assert component.llm.n == 10 + assert component.llm.stop == ["test-stop-word"] + assert component.llm.presence_penalty == 0.5 + assert component.llm.frequency_penalty == 0.4 + assert component.llm.logit_bias == {"test-logit-bias": 0.3} + assert component.llm.stream is True + assert component.llm.streaming_callback == callback + assert component.llm.api_base_url == "test-base-url" + assert component.llm.openai_organization == "test-orga-id" + assert component.llm.max_tokens_limit == 8192 - tiktoken_patch.get_encoding.assert_called_once_with("test-encoding") + tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") assert not caplog.records @pytest.mark.unit - def test_init_unknown_tokenizer(self): - with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: - with pytest.raises(ValueError, match="Tokenizer for model 'test-another-model-name' not found."): - ChatGPTGenerator(model_name="test-another-model-name") - - @pytest.mark.unit - def test_init_unknown_token_limit(self, monkeypatch): - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", - {"test-model-name": "test-encoding"}, - ) - with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: - with pytest.raises(ValueError, match="Max tokens limit for model 'test-model-name' not found."): - ChatGPTGenerator(model_name="test-model-name") - - @pytest.mark.unit - def test_to_dict_with_custom_init_parameters(self): - with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + def test_to_dict_default(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: component = ChatGPTGenerator() data = component.to_dict() assert data == { @@ -108,15 +84,15 @@ def test_to_dict_with_custom_init_parameters(self): "init_parameters": { "api_key": None, "model_name": "gpt-3.5-turbo", - "system_prompt": "You are a helpful assistant.", + "system_prompt": None, "max_tokens": 500, "temperature": 0.7, "top_p": 1, "n": 1, - "stop": None, + "stop": [], "presence_penalty": 0, "frequency_penalty": 0, - "logit_bias": None, + "logit_bias": {}, "stream": False, # FIXME serialize callback? "api_base_url": "https://api.openai.com/v1", @@ -125,20 +101,12 @@ def test_to_dict_with_custom_init_parameters(self): } @pytest.mark.unit - def test_to_dict_with_custom_init_parameters(self, monkeypatch): - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", - {"test-model-name": "test-encoding"}, - ) - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", - {"test-model-name": 10}, - ) - with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + def test_to_dict_with_parameters(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: callback = lambda x: x component = ChatGPTGenerator( api_key="test-api-key", - model_name="test-model-name", + model_name="gpt-4", system_prompt="test-system-prompt", max_tokens=20, temperature=1, @@ -158,7 +126,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "type": "ChatGPTGenerator", "init_parameters": { "api_key": "test-api-key", - "model_name": "test-model-name", + "model_name": "gpt-4", "system_prompt": "test-system-prompt", "max_tokens": 20, "temperature": 1, @@ -176,21 +144,13 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): } @pytest.mark.unit - def test_from_dict(self, monkeypatch): - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", - {"test-model-name": "test-encoding"}, - ) - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", - {"test-model-name": 10}, - ) - with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + def test_from_dict(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: data = { "type": "ChatGPTGenerator", "init_parameters": { "api_key": "test-api-key", - "model_name": "test-model-name", + "model_name": "gpt-4", "system_prompt": "test-system-prompt", "max_tokens": 20, "temperature": 1, @@ -207,116 +167,57 @@ def test_from_dict(self, monkeypatch): }, } component = ChatGPTGenerator.from_dict(data) - assert component.api_key == "test-api-key" - assert component.model_name == "test-model-name" assert component.system_prompt == "test-system-prompt" - assert component.max_tokens == 20 - assert component.temperature == 1 - assert component.top_p == 5 - assert component.n == 10 - assert component.stop == ["test-stop-word"] - assert component.presence_penalty == 0.5 - assert component.frequency_penalty == 0.4 - assert component.logit_bias == {"test-logit-bias": 0.3} - assert component.stream is True - assert component.streaming_callback == default_streaming_callback - assert component.api_base_url == "test-base-url" - assert component.openai_organization == "test-orga-id" - assert component.max_tokens_limit == 10 + assert component.llm.api_key == "test-api-key" + assert component.llm.model_name == "gpt-4" + assert component.llm.max_tokens == 20 + assert component.llm.temperature == 1 + assert component.llm.top_p == 5 + assert component.llm.n == 10 + assert component.llm.stop == ["test-stop-word"] + assert component.llm.presence_penalty == 0.5 + assert component.llm.frequency_penalty == 0.4 + assert component.llm.logit_bias == {"test-logit-bias": 0.3} + assert component.llm.stream is True + assert component.llm.streaming_callback == default_streaming_callback + assert component.llm.api_base_url == "test-base-url" + assert component.llm.openai_organization == "test-orga-id" + assert component.llm.max_tokens_limit == 8192 @pytest.mark.unit def test_run_no_api_key(self): - with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: component = ChatGPTGenerator() with pytest.raises(ValueError, match="OpenAI API key is missing. Please provide an API key."): - component.run(prompts=[]) + component.run(prompts=["test"]) @pytest.mark.unit - def test_run(self): - with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: - with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model") as query_patch: - query_patch.side_effect = lambda payload, **kwargs: ( - [ - f"Response for {payload['messages'][1]['content']}", - f"Another Response for {payload['messages'][1]['content']}", - ], - [{"metadata of": payload["messages"][1]["content"]}], - ) - component = ChatGPTGenerator( - api_key="test-api-key", openai_organization="test_orga_id", api_base_url="test-base-url" - ) - - results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) - - assert results == { - "replies": [ - [f"Response for test-prompt-1", f"Another Response for test-prompt-1"], - [f"Response for test-prompt-2", f"Another Response for test-prompt-2"], - ], - "metadata": [[{"metadata of": "test-prompt-1"}], [{"metadata of": "test-prompt-2"}]], - } - query_patch.call_count == 2 - query_patch.assert_any_call( - url="test-base-url/chat/completions", - headers={ - "Authorization": f"Bearer test-api-key", - "Content-Type": "application/json", - "OpenAI-Organization": "test_orga_id", - }, - payload={ - "model": "gpt-3.5-turbo", - "max_tokens": 500, - "temperature": 0.7, - "top_p": 1, - "n": 1, - "stream": False, - "stop": [], - "presence_penalty": 0, - "frequency_penalty": 0, - "logit_bias": {}, - "messages": [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "test-prompt-1"}, - ], - }, - ) + def test_run_no_system_prompt(self): + with patch("haystack.preview.components.generators.openai.chatgpt.ChatGPTBackend") as chatgpt_patch: + chatgpt_patch.return_value.query.side_effect = lambda chat, **kwargs: ( + [f"{msg.role}: {msg.content}" for msg in chat], + {"some_info": None}, + ) + component = ChatGPTGenerator(api_key="test-api-key") + results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + assert results == { + "replies": [["user: test-prompt-1"], ["user: test-prompt-2"]], + "metadata": [{"some_info": None}, {"some_info": None}], + } @pytest.mark.unit - def test_run_streaming(self): - with patch("haystack.preview.components.generators.openai.chatgpt.tiktoken") as tiktoken_patch: - with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model_stream") as query_patch: - query_patch.side_effect = lambda payload, **kwargs: ( - [f"Response for {payload['messages'][1]['content']}"], - [{"metadata of": payload["messages"][1]["content"]}], - ) - callback = Mock() - component = ChatGPTGenerator(api_key="test-api-key", stream=True, streaming_callback=callback) - - results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) - - assert results == { - "replies": [["Response for test-prompt-1"], ["Response for test-prompt-2"]], - "metadata": [[{"metadata of": "test-prompt-1"}], [{"metadata of": "test-prompt-2"}]], - } - query_patch.call_count == 2 - query_patch.assert_any_call( - url="https://api.openai.com/v1/chat/completions", - headers={"Authorization": f"Bearer test-api-key", "Content-Type": "application/json"}, - payload={ - "model": "gpt-3.5-turbo", - "max_tokens": 500, - "temperature": 0.7, - "top_p": 1, - "n": 1, - "stream": True, - "stop": [], - "presence_penalty": 0, - "frequency_penalty": 0, - "logit_bias": {}, - "messages": [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "test-prompt-1"}, - ], - }, - callback=callback, - ) + def test_run_with_system_prompt(self): + with patch("haystack.preview.components.generators.openai.chatgpt.ChatGPTBackend") as chatgpt_patch: + chatgpt_patch.return_value.query.side_effect = lambda chat, **kwargs: ( + [f"{msg.role}: {msg.content}" for msg in chat], + {"some_info": None}, + ) + component = ChatGPTGenerator(api_key="test-api-key", system_prompt="test-system-prompt") + results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + assert results == { + "replies": [ + ["system: test-system-prompt", "user: test-prompt-1"], + ["system: test-system-prompt", "user: test-prompt-2"], + ], + "metadata": [{"some_info": None}, {"some_info": None}], + } diff --git a/test/preview/llm_backends/test_chatgpt_backend.py b/test/preview/llm_backends/test_chatgpt_backend.py new file mode 100644 index 0000000000..d3e87753a5 --- /dev/null +++ b/test/preview/llm_backends/test_chatgpt_backend.py @@ -0,0 +1,322 @@ +from unittest.mock import patch, Mock + +import pytest + +from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator +from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback + + +class TestChatGPTGenerator: + @pytest.mark.unit + def test_init_default(self, caplog): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTGenerator() + assert component.api_key is None + assert component.model_name == "gpt-3.5-turbo" + assert component.system_prompt == "You are a helpful assistant." + assert component.max_tokens == 500 + assert component.temperature == 0.7 + assert component.top_p == 1 + assert component.n == 1 + assert component.stop == [] + assert component.presence_penalty == 0 + assert component.frequency_penalty == 0 + assert component.logit_bias == {} + assert component.stream is False + assert component.streaming_callback == default_streaming_callback + assert component.api_base_url == "https://api.openai.com/v1" + assert component.openai_organization is None + assert component.max_tokens_limit == 4097 + + tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") + assert caplog.records[0].message == ( + "OpenAI API key is missing. You will need to provide an API key to Pipeline.run()." + ) + + @pytest.mark.unit + def test_init_with_parameters(self, caplog, monkeypatch): + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", + {"test-model-name": "test-encoding"}, + ) + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", + {"test-model-name": 10}, + ) + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + callback = lambda x: x + component = ChatGPTGenerator( + api_key="test-api-key", + model_name="test-model-name", + system_prompt="test-system-prompt", + max_tokens=20, + temperature=1, + top_p=5, + n=10, + stop=["test-stop-word"], + presence_penalty=0.5, + frequency_penalty=0.4, + logit_bias={"test-logit-bias": 0.3}, + stream=True, + streaming_callback=callback, + api_base_url="test-base-url", + openai_organization="test-orga-id", + ) + assert component.api_key == "test-api-key" + assert component.model_name == "test-model-name" + assert component.system_prompt == "test-system-prompt" + assert component.max_tokens == 20 + assert component.temperature == 1 + assert component.top_p == 5 + assert component.n == 10 + assert component.stop == ["test-stop-word"] + assert component.presence_penalty == 0.5 + assert component.frequency_penalty == 0.4 + assert component.logit_bias == {"test-logit-bias": 0.3} + assert component.stream is True + assert component.streaming_callback == callback + assert component.api_base_url == "test-base-url" + assert component.openai_organization == "test-orga-id" + assert component.max_tokens_limit == 10 + + tiktoken_patch.get_encoding.assert_called_once_with("test-encoding") + assert not caplog.records + + @pytest.mark.unit + def test_init_unknown_tokenizer(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + with pytest.raises(ValueError, match="Tokenizer for model 'test-another-model-name' not found."): + ChatGPTGenerator(model_name="test-another-model-name") + + @pytest.mark.unit + def test_init_unknown_token_limit(self, monkeypatch): + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", + {"test-model-name": "test-encoding"}, + ) + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + with pytest.raises(ValueError, match="Max tokens limit for model 'test-model-name' not found."): + ChatGPTGenerator(model_name="test-model-name") + + @pytest.mark.unit + def test_to_dict_with_custom_init_parameters(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTGenerator() + data = component.to_dict() + assert data == { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": None, + "model_name": "gpt-3.5-turbo", + "system_prompt": "You are a helpful assistant.", + "max_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stop": None, + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": None, + "stream": False, + # FIXME serialize callback? + "api_base_url": "https://api.openai.com/v1", + "openai_organization": None, + }, + } + + @pytest.mark.unit + def test_to_dict_with_custom_init_parameters(self, monkeypatch): + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", + {"test-model-name": "test-encoding"}, + ) + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", + {"test-model-name": 10}, + ) + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + callback = lambda x: x + component = ChatGPTGenerator( + api_key="test-api-key", + model_name="test-model-name", + system_prompt="test-system-prompt", + max_tokens=20, + temperature=1, + top_p=5, + n=10, + stop=["test-stop-word"], + presence_penalty=0.5, + frequency_penalty=0.4, + logit_bias={"test-logit-bias": 0.3}, + stream=True, + streaming_callback=callback, + api_base_url="test-base-url", + openai_organization="test-orga-id", + ) + data = component.to_dict() + assert data == { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": "test-api-key", + "model_name": "test-model-name", + "system_prompt": "test-system-prompt", + "max_tokens": 20, + "temperature": 1, + "top_p": 5, + "n": 10, + "stop": ["test-stop-word"], + "presence_penalty": 0.5, + "frequency_penalty": 0.4, + "logit_bias": {"test-logit-bias": 0.3}, + "stream": True, + # FIXME serialize callback? + "api_base_url": "test-base-url", + "openai_organization": "test-orga-id", + }, + } + + @pytest.mark.unit + def test_from_dict(self, monkeypatch): + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", + {"test-model-name": "test-encoding"}, + ) + monkeypatch.setattr( + "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", + {"test-model-name": 10}, + ) + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + data = { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": "test-api-key", + "model_name": "test-model-name", + "system_prompt": "test-system-prompt", + "max_tokens": 20, + "temperature": 1, + "top_p": 5, + "n": 10, + "stop": ["test-stop-word"], + "presence_penalty": 0.5, + "frequency_penalty": 0.4, + "logit_bias": {"test-logit-bias": 0.3}, + "stream": True, + # FIXME serialize callback? + "api_base_url": "test-base-url", + "openai_organization": "test-orga-id", + }, + } + component = ChatGPTGenerator.from_dict(data) + assert component.api_key == "test-api-key" + assert component.model_name == "test-model-name" + assert component.system_prompt == "test-system-prompt" + assert component.max_tokens == 20 + assert component.temperature == 1 + assert component.top_p == 5 + assert component.n == 10 + assert component.stop == ["test-stop-word"] + assert component.presence_penalty == 0.5 + assert component.frequency_penalty == 0.4 + assert component.logit_bias == {"test-logit-bias": 0.3} + assert component.stream is True + assert component.streaming_callback == default_streaming_callback + assert component.api_base_url == "test-base-url" + assert component.openai_organization == "test-orga-id" + assert component.max_tokens_limit == 10 + + @pytest.mark.unit + def test_run_no_api_key(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTGenerator() + with pytest.raises(ValueError, match="OpenAI API key is missing. Please provide an API key."): + component.run(prompts=[]) + + @pytest.mark.unit + def test_run(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model") as query_patch: + query_patch.side_effect = lambda payload, **kwargs: ( + [ + f"Response for {payload['messages'][1]['content']}", + f"Another Response for {payload['messages'][1]['content']}", + ], + [{"metadata of": payload["messages"][1]["content"]}], + ) + component = ChatGPTGenerator( + api_key="test-api-key", openai_organization="test_orga_id", api_base_url="test-base-url" + ) + + results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + + assert results == { + "replies": [ + [f"Response for test-prompt-1", f"Another Response for test-prompt-1"], + [f"Response for test-prompt-2", f"Another Response for test-prompt-2"], + ], + "metadata": [[{"metadata of": "test-prompt-1"}], [{"metadata of": "test-prompt-2"}]], + } + query_patch.call_count == 2 + query_patch.assert_any_call( + url="test-base-url/chat/completions", + headers={ + "Authorization": f"Bearer test-api-key", + "Content-Type": "application/json", + "OpenAI-Organization": "test_orga_id", + }, + payload={ + "model": "gpt-3.5-turbo", + "max_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stream": False, + "stop": [], + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": {}, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "test-prompt-1"}, + ], + }, + ) + + @pytest.mark.unit + def test_run_streaming(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model_stream") as query_patch: + query_patch.side_effect = lambda payload, **kwargs: ( + [f"Response for {payload['messages'][1]['content']}"], + [{"metadata of": payload["messages"][1]["content"]}], + ) + callback = Mock() + component = ChatGPTGenerator(api_key="test-api-key", stream=True, streaming_callback=callback) + + results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + + assert results == { + "replies": [["Response for test-prompt-1"], ["Response for test-prompt-2"]], + "metadata": [[{"metadata of": "test-prompt-1"}], [{"metadata of": "test-prompt-2"}]], + } + query_patch.call_count == 2 + query_patch.assert_any_call( + url="https://api.openai.com/v1/chat/completions", + headers={"Authorization": f"Bearer test-api-key", "Content-Type": "application/json"}, + payload={ + "model": "gpt-3.5-turbo", + "max_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stream": True, + "stop": [], + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": {}, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "test-prompt-1"}, + ], + }, + callback=callback, + ) diff --git a/test/preview/components/generators/openai/test_openai_helpers.py b/test/preview/llm_backends/test_openai_helpers.py similarity index 92% rename from test/preview/components/generators/openai/test_openai_helpers.py rename to test/preview/llm_backends/test_openai_helpers.py index 01c874a91d..5b3398c066 100644 --- a/test/preview/components/generators/openai/test_openai_helpers.py +++ b/test/preview/llm_backends/test_openai_helpers.py @@ -3,15 +3,10 @@ import pytest -from haystack.preview.components.generators.openai.errors import ( - OpenAIUnauthorizedError, - OpenAIError, - OpenAIRateLimitError, -) -from haystack.preview.components.generators.openai._helpers import ( +from haystack.preview.llm_backends.openai.errors import OpenAIUnauthorizedError, OpenAIError, OpenAIRateLimitError +from haystack.preview.llm_backends.openai._helpers import ( raise_for_status, check_truncated_answers, - check_filtered_answers, query_chat_model, query_chat_model_stream, enforce_token_limit, @@ -71,16 +66,6 @@ def test_check_truncated_answers(caplog): ) -@pytest.mark.unit -def test_check_truncated_answers(caplog): - result = {"choices": [{"finish_reason": "content_filter"}, {"finish_reason": "length"}, {"finish_reason": "stop"}]} - payload = {"n": 3} - check_filtered_answers(result, payload) - assert caplog.records[0].message == ( - "1 out of the 3 completions have omitted content due to a flag from OpenAI content filters." - ) - - @pytest.mark.unit def test_query_chat_model(): with patch("haystack.preview.components.generators.openai._helpers.requests.post") as mock_post: From 84332c63574e3530aafb5bd4d48314d639e50c3e Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 1 Sep 2023 18:29:41 +0200 Subject: [PATCH 22/25] fix tests --- test/preview/components/conftest.py | 11 - test/preview/conftest.py | 11 +- .../llm_backends/test_chatgpt_backend.py | 226 ++++++------------ .../llm_backends/test_openai_helpers.py | 35 ++- 4 files changed, 110 insertions(+), 173 deletions(-) delete mode 100644 test/preview/components/conftest.py diff --git a/test/preview/components/conftest.py b/test/preview/components/conftest.py deleted file mode 100644 index 833ea877f7..0000000000 --- a/test/preview/components/conftest.py +++ /dev/null @@ -1,11 +0,0 @@ -from unittest.mock import patch -import pytest - - -@pytest.fixture(autouse=True) -def tenacity_wait(): - """ - Mocks tenacity's wait function to speed up tests. - """ - with patch("tenacity.nap.time"): - yield diff --git a/test/preview/conftest.py b/test/preview/conftest.py index b8abfa41a6..377370bccf 100644 --- a/test/preview/conftest.py +++ b/test/preview/conftest.py @@ -1,4 +1,4 @@ -from unittest.mock import Mock +from unittest.mock import Mock, patch import pytest @@ -11,3 +11,12 @@ def mock_tokenizer(): tokenizer.encode = lambda text: text.split() tokenizer.decode = lambda tokens: " ".join(tokens) return tokenizer + + +@pytest.fixture(autouse=True) +def tenacity_wait(): + """ + Mocks tenacity's wait function to speed up tests. + """ + with patch("tenacity.nap.time"): + yield diff --git a/test/preview/llm_backends/test_chatgpt_backend.py b/test/preview/llm_backends/test_chatgpt_backend.py index d3e87753a5..0f449e3370 100644 --- a/test/preview/llm_backends/test_chatgpt_backend.py +++ b/test/preview/llm_backends/test_chatgpt_backend.py @@ -2,18 +2,16 @@ import pytest -from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator -from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback +from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend, default_streaming_callback, ChatMessage -class TestChatGPTGenerator: +class TestChatGPTBackend: @pytest.mark.unit def test_init_default(self, caplog): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - component = ChatGPTGenerator() + component = ChatGPTBackend() assert component.api_key is None assert component.model_name == "gpt-3.5-turbo" - assert component.system_prompt == "You are a helpful assistant." assert component.max_tokens == 500 assert component.temperature == 0.7 assert component.top_p == 1 @@ -34,21 +32,12 @@ def test_init_default(self, caplog): ) @pytest.mark.unit - def test_init_with_parameters(self, caplog, monkeypatch): - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", - {"test-model-name": "test-encoding"}, - ) - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", - {"test-model-name": 10}, - ) + def test_init_with_parameters(self, caplog): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: callback = lambda x: x - component = ChatGPTGenerator( + component = ChatGPTBackend( api_key="test-api-key", - model_name="test-model-name", - system_prompt="test-system-prompt", + model_name="gpt-4", max_tokens=20, temperature=1, top_p=5, @@ -63,8 +52,7 @@ def test_init_with_parameters(self, caplog, monkeypatch): openai_organization="test-orga-id", ) assert component.api_key == "test-api-key" - assert component.model_name == "test-model-name" - assert component.system_prompt == "test-system-prompt" + assert component.model_name == "gpt-4" assert component.max_tokens == 20 assert component.temperature == 1 assert component.top_p == 5 @@ -77,69 +65,55 @@ def test_init_with_parameters(self, caplog, monkeypatch): assert component.streaming_callback == callback assert component.api_base_url == "test-base-url" assert component.openai_organization == "test-orga-id" - assert component.max_tokens_limit == 10 + assert component.max_tokens_limit == 8192 - tiktoken_patch.get_encoding.assert_called_once_with("test-encoding") + tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") assert not caplog.records @pytest.mark.unit def test_init_unknown_tokenizer(self): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: with pytest.raises(ValueError, match="Tokenizer for model 'test-another-model-name' not found."): - ChatGPTGenerator(model_name="test-another-model-name") + ChatGPTBackend(model_name="test-another-model-name") @pytest.mark.unit def test_init_unknown_token_limit(self, monkeypatch): monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", - {"test-model-name": "test-encoding"}, + "haystack.preview.llm_backends.openai.chatgpt.OPENAI_TOKENIZERS", {"test-model-name": "test-encoding"} ) with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: with pytest.raises(ValueError, match="Max tokens limit for model 'test-model-name' not found."): - ChatGPTGenerator(model_name="test-model-name") + ChatGPTBackend(model_name="test-model-name") @pytest.mark.unit - def test_to_dict_with_custom_init_parameters(self): + def test_to_dict_default(self): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - component = ChatGPTGenerator() + component = ChatGPTBackend() data = component.to_dict() assert data == { - "type": "ChatGPTGenerator", - "init_parameters": { - "api_key": None, - "model_name": "gpt-3.5-turbo", - "system_prompt": "You are a helpful assistant.", - "max_tokens": 500, - "temperature": 0.7, - "top_p": 1, - "n": 1, - "stop": None, - "presence_penalty": 0, - "frequency_penalty": 0, - "logit_bias": None, - "stream": False, - # FIXME serialize callback? - "api_base_url": "https://api.openai.com/v1", - "openai_organization": None, - }, + "api_key": None, + "model_name": "gpt-3.5-turbo", + "max_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stop": [], + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": {}, + "stream": False, + # FIXME serialize callback? + "api_base_url": "https://api.openai.com/v1", + "openai_organization": None, } @pytest.mark.unit - def test_to_dict_with_custom_init_parameters(self, monkeypatch): - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", - {"test-model-name": "test-encoding"}, - ) - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", - {"test-model-name": 10}, - ) + def test_to_dict_with_parameters(self): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: callback = lambda x: x - component = ChatGPTGenerator( + component = ChatGPTBackend( api_key="test-api-key", - model_name="test-model-name", - system_prompt="test-system-prompt", + model_name="gpt-4", max_tokens=20, temperature=1, top_p=5, @@ -155,86 +129,33 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): ) data = component.to_dict() assert data == { - "type": "ChatGPTGenerator", - "init_parameters": { - "api_key": "test-api-key", - "model_name": "test-model-name", - "system_prompt": "test-system-prompt", - "max_tokens": 20, - "temperature": 1, - "top_p": 5, - "n": 10, - "stop": ["test-stop-word"], - "presence_penalty": 0.5, - "frequency_penalty": 0.4, - "logit_bias": {"test-logit-bias": 0.3}, - "stream": True, - # FIXME serialize callback? - "api_base_url": "test-base-url", - "openai_organization": "test-orga-id", - }, + "api_key": "test-api-key", + "model_name": "gpt-4", + "max_tokens": 20, + "temperature": 1, + "top_p": 5, + "n": 10, + "stop": ["test-stop-word"], + "presence_penalty": 0.5, + "frequency_penalty": 0.4, + "logit_bias": {"test-logit-bias": 0.3}, + "stream": True, + # FIXME serialize callback? + "api_base_url": "test-base-url", + "openai_organization": "test-orga-id", } - @pytest.mark.unit - def test_from_dict(self, monkeypatch): - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS", - {"test-model-name": "test-encoding"}, - ) - monkeypatch.setattr( - "haystack.preview.components.generators.openai.chatgpt.OPENAI_TOKENIZERS_TOKEN_LIMITS", - {"test-model-name": 10}, - ) - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - data = { - "type": "ChatGPTGenerator", - "init_parameters": { - "api_key": "test-api-key", - "model_name": "test-model-name", - "system_prompt": "test-system-prompt", - "max_tokens": 20, - "temperature": 1, - "top_p": 5, - "n": 10, - "stop": ["test-stop-word"], - "presence_penalty": 0.5, - "frequency_penalty": 0.4, - "logit_bias": {"test-logit-bias": 0.3}, - "stream": True, - # FIXME serialize callback? - "api_base_url": "test-base-url", - "openai_organization": "test-orga-id", - }, - } - component = ChatGPTGenerator.from_dict(data) - assert component.api_key == "test-api-key" - assert component.model_name == "test-model-name" - assert component.system_prompt == "test-system-prompt" - assert component.max_tokens == 20 - assert component.temperature == 1 - assert component.top_p == 5 - assert component.n == 10 - assert component.stop == ["test-stop-word"] - assert component.presence_penalty == 0.5 - assert component.frequency_penalty == 0.4 - assert component.logit_bias == {"test-logit-bias": 0.3} - assert component.stream is True - assert component.streaming_callback == default_streaming_callback - assert component.api_base_url == "test-base-url" - assert component.openai_organization == "test-orga-id" - assert component.max_tokens_limit == 10 - @pytest.mark.unit def test_run_no_api_key(self): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - component = ChatGPTGenerator() + component = ChatGPTBackend() with pytest.raises(ValueError, match="OpenAI API key is missing. Please provide an API key."): - component.run(prompts=[]) + component.query(chat=[]) @pytest.mark.unit - def test_run(self): + def test_query(self): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model") as query_patch: + with patch("haystack.preview.llm_backends.openai.chatgpt.query_chat_model") as query_patch: query_patch.side_effect = lambda payload, **kwargs: ( [ f"Response for {payload['messages'][1]['content']}", @@ -242,21 +163,24 @@ def test_run(self): ], [{"metadata of": payload["messages"][1]["content"]}], ) - component = ChatGPTGenerator( + component = ChatGPTBackend( api_key="test-api-key", openai_organization="test_orga_id", api_base_url="test-base-url" ) - results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + results = component.query( + chat=[ + ChatMessage(content="test-prompt-system", role="system"), + ChatMessage(content="test-prompt-user", role="user"), + ] + ) + + assert results == ( + [f"Response for test-prompt-user", f"Another Response for test-prompt-user"], + [{"metadata of": "test-prompt-user"}], + ) - assert results == { - "replies": [ - [f"Response for test-prompt-1", f"Another Response for test-prompt-1"], - [f"Response for test-prompt-2", f"Another Response for test-prompt-2"], - ], - "metadata": [[{"metadata of": "test-prompt-1"}], [{"metadata of": "test-prompt-2"}]], - } query_patch.call_count == 2 - query_patch.assert_any_call( + query_patch.assert_called_once_with( url="test-base-url/chat/completions", headers={ "Authorization": f"Bearer test-api-key", @@ -275,29 +199,31 @@ def test_run(self): "frequency_penalty": 0, "logit_bias": {}, "messages": [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "test-prompt-1"}, + {"role": "system", "content": "test-prompt-system"}, + {"role": "user", "content": "test-prompt-user"}, ], }, ) @pytest.mark.unit - def test_run_streaming(self): + def test_query_streaming(self): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - with patch("haystack.preview.components.generators.openai.chatgpt.query_chat_model_stream") as query_patch: + with patch("haystack.preview.llm_backends.openai.chatgpt.query_chat_model_stream") as query_patch: query_patch.side_effect = lambda payload, **kwargs: ( [f"Response for {payload['messages'][1]['content']}"], [{"metadata of": payload["messages"][1]["content"]}], ) callback = Mock() - component = ChatGPTGenerator(api_key="test-api-key", stream=True, streaming_callback=callback) + component = ChatGPTBackend(api_key="test-api-key", stream=True, streaming_callback=callback) - results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + results = component.query( + chat=[ + ChatMessage(content="test-prompt-system", role="system"), + ChatMessage(content="test-prompt-user", role="user"), + ] + ) - assert results == { - "replies": [["Response for test-prompt-1"], ["Response for test-prompt-2"]], - "metadata": [[{"metadata of": "test-prompt-1"}], [{"metadata of": "test-prompt-2"}]], - } + assert results == (["Response for test-prompt-user"], [{"metadata of": "test-prompt-user"}]) query_patch.call_count == 2 query_patch.assert_any_call( url="https://api.openai.com/v1/chat/completions", @@ -314,8 +240,8 @@ def test_run_streaming(self): "frequency_penalty": 0, "logit_bias": {}, "messages": [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "test-prompt-1"}, + {"role": "system", "content": "test-prompt-system"}, + {"role": "user", "content": "test-prompt-user"}, ], }, callback=callback, diff --git a/test/preview/llm_backends/test_openai_helpers.py b/test/preview/llm_backends/test_openai_helpers.py index 5b3398c066..bf64f7594e 100644 --- a/test/preview/llm_backends/test_openai_helpers.py +++ b/test/preview/llm_backends/test_openai_helpers.py @@ -5,6 +5,7 @@ from haystack.preview.llm_backends.openai.errors import OpenAIUnauthorizedError, OpenAIError, OpenAIRateLimitError from haystack.preview.llm_backends.openai._helpers import ( + ChatMessage, raise_for_status, check_truncated_answers, query_chat_model, @@ -68,7 +69,7 @@ def test_check_truncated_answers(caplog): @pytest.mark.unit def test_query_chat_model(): - with patch("haystack.preview.components.generators.openai._helpers.requests.post") as mock_post: + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: response = Mock() response.status_code = 200 response.text = """ @@ -113,7 +114,7 @@ def test_query_chat_model(): @pytest.mark.unit def test_query_chat_model_fail(): - with patch("haystack.preview.components.generators.openai._helpers.requests.post") as mock_post: + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: response = Mock() response.status_code = 500 mock_post.return_value = response @@ -145,8 +146,8 @@ def mock_chat_completion_stream(model="test-model", index=0, token="test", finis @pytest.mark.unit def test_query_chat_model_stream(): - with patch("haystack.preview.components.generators.openai._helpers.requests.post") as mock_post: - with patch("haystack.preview.components.generators.openai._helpers.sseclient.SSEClient") as mock_sseclient: + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: + with patch("haystack.preview.llm_backends.openai._helpers.sseclient.SSEClient") as mock_sseclient: callback = lambda token, event_data: f"|{token}|" response = Mock() response.status_code = 200 @@ -179,7 +180,7 @@ def test_query_chat_model_stream(): @pytest.mark.unit def test_query_chat_model_stream_fail(): - with patch("haystack.preview.components.generators.openai._helpers.requests.post") as mock_post: + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: callback = Mock() response = Mock() response.status_code = 500 @@ -217,25 +218,37 @@ def test_enforce_token_limit_below_limit(caplog, mock_tokenizer): @pytest.mark.unit def test_enforce_token_limit_chat_above_limit(caplog, mock_tokenizer): prompts = enforce_token_limit_chat( - ["System Prompt", "This is a test prompt."], + [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a test prompt.", role="user"), + ], tokenizer=mock_tokenizer, max_tokens_limit=7, tokens_per_message_overhead=2, ) - assert prompts == ["System Prompt", "This is a"] + assert prompts == [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a", role="user"), + ] assert caplog.records[0].message == ( - "The prompts have been truncated from 11 tokens to 7 tokens to fit within the max token limit. " - "Reduce the length of the prompt to prevent it from being cut off." + "The chat have been truncated from 11 tokens to 7 tokens to fit within the max token limit. " + "Reduce the length of the chat to prevent it from being cut off." ) @pytest.mark.unit def test_enforce_token_limit_chat_below_limit(caplog, mock_tokenizer): prompts = enforce_token_limit_chat( - ["System Prompt", "This is a test prompt."], + [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a test prompt.", role="user"), + ], tokenizer=mock_tokenizer, max_tokens_limit=100, tokens_per_message_overhead=2, ) - assert prompts == ["System Prompt", "This is a test prompt."] + assert prompts == [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a test prompt.", role="user"), + ] assert not caplog.records From 329b54d917600f5620eb33794d240817f8276ff7 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 10:01:09 +0200 Subject: [PATCH 23/25] mypy --- haystack/preview/llm_backends/openai/_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/preview/llm_backends/openai/_helpers.py b/haystack/preview/llm_backends/openai/_helpers.py index bd4aaac3ca..9dde131b35 100644 --- a/haystack/preview/llm_backends/openai/_helpers.py +++ b/haystack/preview/llm_backends/openai/_helpers.py @@ -194,7 +194,7 @@ def enforce_token_limit(prompt: str, tokenizer: "tiktoken.Encoding", max_tokens_ def enforce_token_limit_chat( chat: List[ChatMessage], tokenizer: "tiktoken.Encoding", max_tokens_limit: int, tokens_per_message_overhead: int -) -> List[str]: +) -> List[ChatMessage]: """ Ensure that the length of the chat is within the max tokens limit of the model. If needed, truncate the messages so that the chat fits within the limit. From 5ee2aacab8f3f16bb22989415089b04ed84ba6e3 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 10:29:16 +0200 Subject: [PATCH 24/25] query->complete --- .../components/generators/openai/chatgpt.py | 2 +- .../preview/llm_backends/openai/_helpers.py | 9 +++---- .../preview/llm_backends/openai/chatgpt.py | 10 +++---- .../openai/test_chatgpt_generator.py | 4 +-- .../llm_backends/test_chatgpt_backend.py | 26 +++++++++---------- .../llm_backends/test_openai_helpers.py | 14 +++++----- 6 files changed, 31 insertions(+), 34 deletions(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 811545afae..afd61e2884 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -171,7 +171,7 @@ def run( replies, metadata = [], [] for chat in chats: - reply, meta = self.llm.query( + reply, meta = self.llm.complete( chat=chat, api_key=api_key, model_name=model_name, diff --git a/haystack/preview/llm_backends/openai/_helpers.py b/haystack/preview/llm_backends/openai/_helpers.py index 9dde131b35..a1602131ea 100644 --- a/haystack/preview/llm_backends/openai/_helpers.py +++ b/haystack/preview/llm_backends/openai/_helpers.py @@ -57,9 +57,7 @@ def default_streaming_callback(token: str, **kwargs): @openai_retry -def query_chat_model( - url: str, headers: Dict[str, str], payload: Dict[str, Any] -) -> Tuple[List[str], List[Dict[str, Any]]]: +def complete(url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Tuple[List[str], List[Dict[str, Any]]]: """ Query ChatGPT without streaming the response. @@ -81,11 +79,12 @@ def query_chat_model( } for choice in json_response.get("choices", []) ] - return [choice["message"]["content"].strip() for choice in json_response.get("choices", [])], metadata + replies = [choice["message"]["content"].strip() for choice in json_response.get("choices", [])] + return replies, metadata @openai_retry -def query_chat_model_stream( +def complete_stream( url: str, headers: Dict[str, str], payload: Dict[str, Any], callback: Callable ) -> Tuple[List[str], List[Dict[str, Any]]]: """ diff --git a/haystack/preview/llm_backends/openai/chatgpt.py b/haystack/preview/llm_backends/openai/chatgpt.py index 05294f6318..d416f6e453 100644 --- a/haystack/preview/llm_backends/openai/chatgpt.py +++ b/haystack/preview/llm_backends/openai/chatgpt.py @@ -7,8 +7,8 @@ from haystack.preview.llm_backends.chat_message import ChatMessage from haystack.preview.llm_backends.openai._helpers import ( default_streaming_callback, - query_chat_model, - query_chat_model_stream, + complete, + complete_stream, enforce_token_limit_chat, OPENAI_TOKENIZERS, OPENAI_TOKENIZERS_TOKEN_LIMITS, @@ -141,7 +141,7 @@ def to_dict(self) -> Dict[str, Any]: "openai_organization": self.openai_organization, } - def query( + def complete( self, chat: List[ChatMessage], api_key: Optional[str] = None, @@ -234,6 +234,6 @@ def query( ) payload = {**parameters, "messages": [asdict(message) for message in chat]} if stream: - return query_chat_model_stream(url=url, headers=headers, payload=payload, callback=streaming_callback) + return complete_stream(url=url, headers=headers, payload=payload, callback=streaming_callback) else: - return query_chat_model(url=url, headers=headers, payload=payload) + return complete(url=url, headers=headers, payload=payload) diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index e967836178..be1597e289 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -194,7 +194,7 @@ def test_run_no_api_key(self): @pytest.mark.unit def test_run_no_system_prompt(self): with patch("haystack.preview.components.generators.openai.chatgpt.ChatGPTBackend") as chatgpt_patch: - chatgpt_patch.return_value.query.side_effect = lambda chat, **kwargs: ( + chatgpt_patch.return_value.complete.side_effect = lambda chat, **kwargs: ( [f"{msg.role}: {msg.content}" for msg in chat], {"some_info": None}, ) @@ -208,7 +208,7 @@ def test_run_no_system_prompt(self): @pytest.mark.unit def test_run_with_system_prompt(self): with patch("haystack.preview.components.generators.openai.chatgpt.ChatGPTBackend") as chatgpt_patch: - chatgpt_patch.return_value.query.side_effect = lambda chat, **kwargs: ( + chatgpt_patch.return_value.complete.side_effect = lambda chat, **kwargs: ( [f"{msg.role}: {msg.content}" for msg in chat], {"some_info": None}, ) diff --git a/test/preview/llm_backends/test_chatgpt_backend.py b/test/preview/llm_backends/test_chatgpt_backend.py index 0f449e3370..58a78b6756 100644 --- a/test/preview/llm_backends/test_chatgpt_backend.py +++ b/test/preview/llm_backends/test_chatgpt_backend.py @@ -150,13 +150,13 @@ def test_run_no_api_key(self): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: component = ChatGPTBackend() with pytest.raises(ValueError, match="OpenAI API key is missing. Please provide an API key."): - component.query(chat=[]) + component.complete(chat=[]) @pytest.mark.unit - def test_query(self): + def test_complete(self): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - with patch("haystack.preview.llm_backends.openai.chatgpt.query_chat_model") as query_patch: - query_patch.side_effect = lambda payload, **kwargs: ( + with patch("haystack.preview.llm_backends.openai.chatgpt.complete") as complete_patch: + complete_patch.side_effect = lambda payload, **kwargs: ( [ f"Response for {payload['messages'][1]['content']}", f"Another Response for {payload['messages'][1]['content']}", @@ -167,7 +167,7 @@ def test_query(self): api_key="test-api-key", openai_organization="test_orga_id", api_base_url="test-base-url" ) - results = component.query( + results = component.complete( chat=[ ChatMessage(content="test-prompt-system", role="system"), ChatMessage(content="test-prompt-user", role="user"), @@ -179,8 +179,8 @@ def test_query(self): [{"metadata of": "test-prompt-user"}], ) - query_patch.call_count == 2 - query_patch.assert_called_once_with( + complete_patch.call_count == 2 + complete_patch.assert_called_once_with( url="test-base-url/chat/completions", headers={ "Authorization": f"Bearer test-api-key", @@ -206,17 +206,17 @@ def test_query(self): ) @pytest.mark.unit - def test_query_streaming(self): + def test_complete_streaming(self): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - with patch("haystack.preview.llm_backends.openai.chatgpt.query_chat_model_stream") as query_patch: - query_patch.side_effect = lambda payload, **kwargs: ( + with patch("haystack.preview.llm_backends.openai.chatgpt.complete_stream") as complete_stream_patch: + complete_stream_patch.side_effect = lambda payload, **kwargs: ( [f"Response for {payload['messages'][1]['content']}"], [{"metadata of": payload["messages"][1]["content"]}], ) callback = Mock() component = ChatGPTBackend(api_key="test-api-key", stream=True, streaming_callback=callback) - results = component.query( + results = component.complete( chat=[ ChatMessage(content="test-prompt-system", role="system"), ChatMessage(content="test-prompt-user", role="user"), @@ -224,8 +224,8 @@ def test_query_streaming(self): ) assert results == (["Response for test-prompt-user"], [{"metadata of": "test-prompt-user"}]) - query_patch.call_count == 2 - query_patch.assert_any_call( + complete_stream_patch.call_count == 2 + complete_stream_patch.assert_any_call( url="https://api.openai.com/v1/chat/completions", headers={"Authorization": f"Bearer test-api-key", "Content-Type": "application/json"}, payload={ diff --git a/test/preview/llm_backends/test_openai_helpers.py b/test/preview/llm_backends/test_openai_helpers.py index bf64f7594e..736d7f3dd5 100644 --- a/test/preview/llm_backends/test_openai_helpers.py +++ b/test/preview/llm_backends/test_openai_helpers.py @@ -8,8 +8,8 @@ ChatMessage, raise_for_status, check_truncated_answers, - query_chat_model, - query_chat_model_stream, + complete, + complete_stream, enforce_token_limit, enforce_token_limit_chat, OPENAI_TIMEOUT, @@ -90,9 +90,7 @@ def test_query_chat_model(): }""" mock_post.return_value = response - replies, metadata = query_chat_model( - url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"} - ) + replies, metadata = complete(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) mock_post.assert_called_once_with( "test-url", headers={"header": "test-header"}, @@ -119,7 +117,7 @@ def test_query_chat_model_fail(): response.status_code = 500 mock_post.return_value = response with pytest.raises(OpenAIError): - query_chat_model(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) + complete(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) mock_post.assert_called_with( "test-url", headers={"header": "test-header"}, @@ -164,7 +162,7 @@ def test_query_chat_model_stream(): ] mock_post.return_value = response - replies, metadata = query_chat_model_stream( + replies, metadata = complete_stream( url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback ) mock_post.assert_called_once_with( @@ -186,7 +184,7 @@ def test_query_chat_model_stream_fail(): response.status_code = 500 mock_post.return_value = response with pytest.raises(OpenAIError): - query_chat_model_stream( + complete_stream( url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback ) mock_post.assert_called_with( From 429a3ae231f19e48d40b4b3ccee658bc6cec568e Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 10:33:46 +0200 Subject: [PATCH 25/25] mypy --- haystack/preview/llm_backends/openai/_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/preview/llm_backends/openai/_helpers.py b/haystack/preview/llm_backends/openai/_helpers.py index a1602131ea..1b446e319b 100644 --- a/haystack/preview/llm_backends/openai/_helpers.py +++ b/haystack/preview/llm_backends/openai/_helpers.py @@ -102,7 +102,7 @@ def complete_stream( response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT, stream=True) raise_for_status(response=response) - client = sseclient.SSEClient(response) + client = sseclient.SSEClient(response) # type: ignore event_data = None tokens = [] try: