From a243cae10bd1420b1284410b89f79eba62399f40 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 15:12:05 +0200 Subject: [PATCH 01/37] chatgpt backend --- haystack/preview/__init__.py | 2 +- haystack/preview/llm_backends/__init__.py | 0 haystack/preview/llm_backends/chat_message.py | 7 + .../preview/llm_backends/openai/__init__.py | 0 .../preview/llm_backends/openai/_helpers.py | 232 ++++++++++++++++ .../preview/llm_backends/openai/chatgpt.py | 239 +++++++++++++++++ .../preview/llm_backends/openai/errors.py | 35 +++ test/preview/conftest.py | 11 +- .../llm_backends/test_chatgpt_backend.py | 248 +++++++++++++++++ .../llm_backends/test_openai_helpers.py | 252 ++++++++++++++++++ 10 files changed, 1024 insertions(+), 2 deletions(-) create mode 100644 haystack/preview/llm_backends/__init__.py create mode 100644 haystack/preview/llm_backends/chat_message.py create mode 100644 haystack/preview/llm_backends/openai/__init__.py create mode 100644 haystack/preview/llm_backends/openai/_helpers.py create mode 100644 haystack/preview/llm_backends/openai/chatgpt.py create mode 100644 haystack/preview/llm_backends/openai/errors.py create mode 100644 test/preview/llm_backends/test_chatgpt_backend.py create mode 100644 test/preview/llm_backends/test_openai_helpers.py diff --git a/haystack/preview/__init__.py b/haystack/preview/__init__.py index 36f7de744f..a5f9ed225c 100644 --- a/haystack/preview/__init__.py +++ b/haystack/preview/__init__.py @@ -1,4 +1,4 @@ from canals import component, Pipeline from canals.serialization import default_from_dict, default_to_dict -from canals.errors import DeserializationError +from canals.errors import DeserializationError, ComponentError from haystack.preview.dataclasses import * diff --git a/haystack/preview/llm_backends/__init__.py b/haystack/preview/llm_backends/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/haystack/preview/llm_backends/chat_message.py b/haystack/preview/llm_backends/chat_message.py new file mode 100644 index 0000000000..ca20f905f3 --- /dev/null +++ b/haystack/preview/llm_backends/chat_message.py @@ -0,0 +1,7 @@ +from dataclasses import dataclass + + +@dataclass +class ChatMessage: + content: str + role: str diff --git a/haystack/preview/llm_backends/openai/__init__.py b/haystack/preview/llm_backends/openai/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/haystack/preview/llm_backends/openai/_helpers.py b/haystack/preview/llm_backends/openai/_helpers.py new file mode 100644 index 0000000000..1b446e319b --- /dev/null +++ b/haystack/preview/llm_backends/openai/_helpers.py @@ -0,0 +1,232 @@ +from typing import List, Callable, Dict, Any, Tuple +import os +import logging +import json + +import tenacity +import requests +import sseclient + +from haystack.preview.lazy_imports import LazyImport +from haystack.preview.llm_backends.chat_message import ChatMessage +from haystack.preview.llm_backends.openai.errors import OpenAIError, OpenAIRateLimitError, OpenAIUnauthorizedError + +with LazyImport("Run 'pip install tiktoken'") as tiktoken_import: + import tiktoken + + +logger = logging.getLogger(__name__) + + +OPENAI_TIMEOUT = float(os.environ.get("HAYSTACK_REMOTE_API_TIMEOUT_SEC", 30)) +OPENAI_BACKOFF = int(os.environ.get("HAYSTACK_REMOTE_API_BACKOFF_SEC", 10)) +OPENAI_MAX_RETRIES = int(os.environ.get("HAYSTACK_REMOTE_API_MAX_RETRIES", 5)) +OPENAI_TOKENIZERS = { + **tiktoken.model.MODEL_TO_ENCODING, + "gpt-35-turbo": "cl100k_base", # https://github.com/openai/tiktoken/pull/72 +} +OPENAI_TOKENIZERS_TOKEN_LIMITS = { + "text-davinci": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3 + "gpt-35-turbo": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3-5 + "gpt-3.5-turbo": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3-5 + "gpt-3.5-turbo-16k": 16384, # Ref: https://platform.openai.com/docs/models/gpt-3-5 + "gpt-3": 4096, # Ref: https://platform.openai.com/docs/models/gpt-3 + "gpt-4-32k": 32768, # Ref: https://platform.openai.com/docs/models/gpt-4 + "gpt-4": 8192, # Ref: https://platform.openai.com/docs/models/gpt-4 +} +OPENAI_STREAMING_DONE_MARKER = "[DONE]" # Ref: https://platform.openai.com/docs/api-reference/chat/create#stream + + +#: Retry on OpenAI errors +openai_retry = tenacity.retry( + reraise=True, + retry=tenacity.retry_if_exception_type(OpenAIError) + and tenacity.retry_if_not_exception_type(OpenAIUnauthorizedError), + wait=tenacity.wait_exponential(multiplier=OPENAI_BACKOFF), + stop=tenacity.stop_after_attempt(OPENAI_MAX_RETRIES), +) + + +def default_streaming_callback(token: str, **kwargs): + """ + Default callback function for streaming responses from OpenAI API. + Prints the tokens to stdout as soon as they are received and returns them. + """ + print(token, flush=True, end="") + return token + + +@openai_retry +def complete(url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Tuple[List[str], List[Dict[str, Any]]]: + """ + Query ChatGPT without streaming the response. + + :param url: The URL to query. + :param headers: The headers to send with the request. + :param payload: The payload to send with the request. + :return: A list of strings containing the response from the OpenAI API. + """ + response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT) + raise_for_status(response=response) + json_response = json.loads(response.text) + check_truncated_answers(result=json_response, payload=payload) + metadata = [ + { + "model": json_response.get("model", None), + "index": choice.get("index", None), + "finish_reason": choice.get("finish_reason", None), + **json_response.get("usage", {}), + } + for choice in json_response.get("choices", []) + ] + replies = [choice["message"]["content"].strip() for choice in json_response.get("choices", [])] + return replies, metadata + + +@openai_retry +def complete_stream( + url: str, headers: Dict[str, str], payload: Dict[str, Any], callback: Callable +) -> Tuple[List[str], List[Dict[str, Any]]]: + """ + Query ChatGPT and streams the response. Once the stream finishes, returns a list of strings just like + self._query_llm() + + :param url: The URL to query. + :param headers: The headers to send with the request. + :param payload: The payload to send with the request. + :param callback: A callback function that is called when a new token is received from the stream. + The callback function should accept two parameters: the token received from the stream and **kwargs. + The callback function should return the token that will be returned at the end of the streaming. + :return: A list of strings containing the response from the OpenAI API. + """ + response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT, stream=True) + raise_for_status(response=response) + + client = sseclient.SSEClient(response) # type: ignore + event_data = None + tokens = [] + try: + for event in client.events(): + if event.data == OPENAI_STREAMING_DONE_MARKER: + break + event_data = json.loads(event.data) + delta = event_data["choices"][0]["delta"] + token = delta["content"] if "content" in delta else None + if token: + tokens.append(callback(token, event_data=event_data["choices"])) + finally: + client.close() + metadata = ( + [ + { + "model": event_data.get("model", None), + "index": choice.get("index", None), + "finish_reason": choice.get("finish_reason", None), + } + for choice in event_data.get("choices", []) + ] + if event_data + else [] + ) + return ["".join(tokens)], metadata + + +def raise_for_status(response: requests.Response): + """ + Raises the appropriate OpenAI error in case of a bad response. + + :param response: The response returned from the OpenAI API. + :raises OpenAIError: If the response status code is not 200. + """ + if response.status_code >= 400: + if response.status_code == 429: + raise OpenAIRateLimitError(f"API rate limit exceeded: {response.text}") + if response.status_code == 401: + raise OpenAIUnauthorizedError(f"API key is invalid: {response.text}") + raise OpenAIError( + f"OpenAI returned an error.\n" f"Status code: {response.status_code}\n" f"Response body: {response.text}", + status_code=response.status_code, + ) + + +def check_truncated_answers(result: Dict[str, Any], payload: Dict[str, Any]): + """ + Check the `finish_reason` the answers returned by OpenAI completions endpoint. + If the `finish_reason` is `length`, log a warning to the user. + + :param result: The result returned from the OpenAI API. + :param payload: The payload sent to the OpenAI API. + """ + truncated_completions = sum(1 for ans in result["choices"] if ans["finish_reason"] == "length") + if truncated_completions > 0: + logger.warning( + "%s out of the %s completions have been truncated before reaching a natural stopping point. " + "Increase the max_tokens parameter to allow for longer completions.", + truncated_completions, + payload["n"], + ) + + +def enforce_token_limit(prompt: str, tokenizer: "tiktoken.Encoding", max_tokens_limit: int) -> str: + """ + Ensure that the length of the prompt is within the max tokens limit of the model. + If needed, truncate the prompt text so that it fits within the limit. + + :param prompt: Prompt text to be sent to the generative model. + :param tokenizer: The tokenizer used to encode the prompt. + :param max_tokens_limit: The max tokens limit of the model. + :return: The prompt text that fits within the max tokens limit of the model. + """ + tiktoken_import.check() + tokens = tokenizer.encode(prompt) + tokens_count = len(tokens) + if tokens_count > max_tokens_limit: + logger.warning( + "The prompt has been truncated from %s tokens to %s tokens to fit within the max token limit. " + "Reduce the length of the prompt to prevent it from being cut off.", + tokens_count, + max_tokens_limit, + ) + prompt = tokenizer.decode(tokens[:max_tokens_limit]) + return prompt + + +def enforce_token_limit_chat( + chat: List[ChatMessage], tokenizer: "tiktoken.Encoding", max_tokens_limit: int, tokens_per_message_overhead: int +) -> List[ChatMessage]: + """ + Ensure that the length of the chat is within the max tokens limit of the model. + If needed, truncate the messages so that the chat fits within the limit. + + :param chat: The chat messages to be sent to the generative model. + :param tokenizer: The tokenizer used to encode the chat. + :param max_tokens_limit: The max tokens limit of the model. + :param tokens_per_message_overhead: The number of tokens that are added to the prompt text for each message. + :return: A chat that fits within the max tokens limit of the model. + """ + print(chat) + messages_len = [len(tokenizer.encode(message.content)) + tokens_per_message_overhead for message in chat] + if (total_chat_length := sum(messages_len)) <= max_tokens_limit: + return chat + + logger.warning( + "The chat have been truncated from %s tokens to %s tokens to fit within the max token limit. " + "Reduce the length of the chat to prevent it from being cut off.", + total_chat_length, + max_tokens_limit, + ) + cut_messages = [] + cut_messages_len: List[int] = [] + for message, message_len in zip(chat, messages_len): + if sum(cut_messages_len) + message_len <= max_tokens_limit: + cut_messages.append(message) + cut_messages_len.append(message_len) + else: + remaining_tokens = max_tokens_limit - sum(cut_messages_len) + cut_messages.append( + ChatMessage( + content=enforce_token_limit(message.content, tokenizer, remaining_tokens), role=message.role + ) + ) + break + return cut_messages diff --git a/haystack/preview/llm_backends/openai/chatgpt.py b/haystack/preview/llm_backends/openai/chatgpt.py new file mode 100644 index 0000000000..d416f6e453 --- /dev/null +++ b/haystack/preview/llm_backends/openai/chatgpt.py @@ -0,0 +1,239 @@ +from typing import Optional, List, Callable, Dict, Any + +import logging +from dataclasses import asdict + +from haystack.preview.lazy_imports import LazyImport +from haystack.preview.llm_backends.chat_message import ChatMessage +from haystack.preview.llm_backends.openai._helpers import ( + default_streaming_callback, + complete, + complete_stream, + enforce_token_limit_chat, + OPENAI_TOKENIZERS, + OPENAI_TOKENIZERS_TOKEN_LIMITS, +) + + +with LazyImport() as tiktoken_import: + import tiktoken + + +logger = logging.getLogger(__name__) + + +TOKENS_PER_MESSAGE_OVERHEAD = 4 + + +class ChatGPTBackend: + """ + ChatGPT LLM interface. + + Queries ChatGPT using OpenAI's GPT-3 ChatGPT API. Invocations are made using REST API. + See [OpenAI ChatGPT API](https://platform.openai.com/docs/guides/chat) for more details. + """ + + # TODO support function calling! + + def __init__( + self, + api_key: Optional[str] = None, + model_name: str = "gpt-3.5-turbo", + max_tokens: Optional[int] = 500, + temperature: Optional[float] = 0.7, + top_p: Optional[float] = 1, + n: Optional[int] = 1, + stop: Optional[List[str]] = None, + presence_penalty: Optional[float] = 0, + frequency_penalty: Optional[float] = 0, + logit_bias: Optional[Dict[str, float]] = None, + stream: bool = False, + streaming_callback: Optional[Callable] = default_streaming_callback, + api_base_url: str = "https://api.openai.com/v1", + openai_organization: Optional[str] = None, + ): + """ + Creates an instance of ChatGPTGenerator for OpenAI's GPT-3.5 model. + + :param api_key: The OpenAI API key. + :param model_name: The name or path of the underlying model. + :param max_tokens: The maximum number of tokens the output text can have. + :param temperature: What sampling temperature to use. Higher values means the model will take more risks. + Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + :param top_p: An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + :param n: How many completions to generate for each prompt. + :param stop: One or more sequences where the API will stop generating further tokens. + :param presence_penalty: What penalty to apply if a token is already present at all. Bigger values mean + the model will be less likely to repeat the same token in the text. + :param frequency_penalty: What penalty to apply if a token has already been generated in the text. + Bigger values mean the model will be less likely to repeat the same token in the text. + :param logit_bias: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the + values are the bias to add to that token. + :param stream: If set to True, the API will stream the response. The streaming_callback parameter + is used to process the stream. If set to False, the response will be returned as a string. + :param streaming_callback: A callback function that is called when a new token is received from the stream. + The callback function should accept two parameters: the token received from the stream and **kwargs. + The callback function should return the token to be sent to the stream. If the callback function is not + provided, the token is printed to stdout. + :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. + :param openai_organization: The OpenAI organization ID. + + See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. + """ + if not api_key: + logger.warning("OpenAI API key is missing. You will need to provide an API key to Pipeline.run().") + + self.api_key = api_key + self.model_name = model_name + + self.max_tokens = max_tokens + self.temperature = temperature + self.top_p = top_p + self.n = n + self.stop = stop or [] + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.logit_bias = logit_bias or {} + self.stream = stream + self.streaming_callback = streaming_callback or default_streaming_callback + + self.openai_organization = openai_organization + self.api_base_url = api_base_url + + tokenizer = None + for model_prefix, tokenizer_name in OPENAI_TOKENIZERS.items(): + if model_name.startswith(model_prefix): + tokenizer = tiktoken.get_encoding(tokenizer_name) + break + if not tokenizer: + raise ValueError(f"Tokenizer for model '{model_name}' not found.") + self.tokenizer = tokenizer + + max_tokens_limit = None + for model_prefix, limit in OPENAI_TOKENIZERS_TOKEN_LIMITS.items(): + if model_name.startswith(model_prefix): + max_tokens_limit = limit + break + if not max_tokens_limit: + raise ValueError(f"Max tokens limit for model '{model_name}' not found.") + self.max_tokens_limit = max_tokens_limit + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize to a dictionary. + """ + return { + "api_key": self.api_key, + "model_name": self.model_name, + "max_tokens": self.max_tokens, + "temperature": self.temperature, + "top_p": self.top_p, + "n": self.n, + "stop": self.stop, + "presence_penalty": self.presence_penalty, + "frequency_penalty": self.frequency_penalty, + "logit_bias": self.logit_bias, + "stream": self.stream, + # FIXME how to serialize the streaming callback? + "api_base_url": self.api_base_url, + "openai_organization": self.openai_organization, + } + + def complete( + self, + chat: List[ChatMessage], + api_key: Optional[str] = None, + model_name: Optional[str] = None, + max_tokens: Optional[int] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + n: Optional[int] = None, + stop: Optional[List[str]] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[Dict[str, float]] = None, + api_base_url: Optional[str] = None, + openai_organization: Optional[str] = None, + stream: Optional[bool] = None, + streaming_callback: Optional[Callable] = None, + ): + """ + Queries the LLM with the prompts to produce replies. + + :param chat: The chat to be sent to the generative model. + :param api_key: The OpenAI API key. + :param model_name: The name or path of the underlying model. + :param max_tokens: The maximum number of tokens the output text can have. + :param temperature: What sampling temperature to use. Higher values means the model will take more risks. + Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + :param top_p: An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + :param n: How many completions to generate for each prompt. + :param stop: One or more sequences where the API will stop generating further tokens. + :param presence_penalty: What penalty to apply if a token is already present at all. Bigger values mean + the model will be less likely to repeat the same token in the text. + :param frequency_penalty: What penalty to apply if a token has already been generated in the text. + Bigger values mean the model will be less likely to repeat the same token in the text. + :param logit_bias: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the + values are the bias to add to that token. + :param stream: If set to True, the API will stream the response. The streaming_callback parameter + is used to process the stream. If set to False, the response will be returned as a string. + :param streaming_callback: A callback function that is called when a new token is received from the stream. + The callback function should accept two parameters: the token received from the stream and **kwargs. + The callback function should return the token to be sent to the stream. If the callback function is not + provided, the token is printed to stdout. + :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. + :param openai_organization: The OpenAI organization ID. + + See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. + """ + api_key = api_key if api_key is not None else self.api_key + + if not api_key: + raise ValueError("OpenAI API key is missing. Please provide an API key.") + + model_name = model_name if model_name is not None else self.model_name + max_tokens = max_tokens if max_tokens is not None else self.max_tokens + temperature = temperature if temperature is not None else self.temperature + top_p = top_p if top_p is not None else self.top_p + n = n if n is not None else self.n + stop = stop if stop is not None else self.stop + presence_penalty = presence_penalty if presence_penalty is not None else self.presence_penalty + frequency_penalty = frequency_penalty if frequency_penalty is not None else self.frequency_penalty + logit_bias = logit_bias if logit_bias is not None else self.logit_bias + stream = stream if stream is not None else self.stream + streaming_callback = streaming_callback if streaming_callback is not None else self.streaming_callback + api_base_url = api_base_url or self.api_base_url + openai_organization = openai_organization if openai_organization is not None else self.openai_organization + + parameters = { + "model": model_name, + "max_tokens": max_tokens, + "temperature": temperature, + "top_p": top_p, + "n": n, + "stream": stream, + "stop": stop, + "presence_penalty": presence_penalty, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, + } + headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + if openai_organization: + headers["OpenAI-Organization"] = openai_organization + url = f"{api_base_url}/chat/completions" + + chat = enforce_token_limit_chat( + chat=chat, + tokenizer=self.tokenizer, + max_tokens_limit=self.max_tokens_limit, + tokens_per_message_overhead=TOKENS_PER_MESSAGE_OVERHEAD, + ) + payload = {**parameters, "messages": [asdict(message) for message in chat]} + if stream: + return complete_stream(url=url, headers=headers, payload=payload, callback=streaming_callback) + else: + return complete(url=url, headers=headers, payload=payload) diff --git a/haystack/preview/llm_backends/openai/errors.py b/haystack/preview/llm_backends/openai/errors.py new file mode 100644 index 0000000000..1787b4e17a --- /dev/null +++ b/haystack/preview/llm_backends/openai/errors.py @@ -0,0 +1,35 @@ +from typing import Optional +from haystack.preview import ComponentError + + +class OpenAIError(ComponentError): + """Exception for issues that occur in the OpenAI APIs""" + + def __init__(self, message: Optional[str] = None, status_code: Optional[int] = None): + super().__init__() + self.message = message + self.status_code = status_code + + def __str__(self): + return self.message + f"(status code {self.status_code})" if self.status_code else "" + + +class OpenAIRateLimitError(OpenAIError): + """ + Rate limit error for OpenAI API (status code 429) + See https://help.openai.com/en/articles/5955604-how-can-i-solve-429-too-many-requests-errors + See https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits + """ + + def __init__(self, message: Optional[str] = None): + super().__init__(message=message, status_code=429) + + +class OpenAIUnauthorizedError(OpenAIError): + """ + Unauthorized error for OpenAI API (status code 401) + See https://platform.openai.com/docs/guides/error-codes/api-errors + """ + + def __init__(self, message: Optional[str] = None): + super().__init__(message=message, status_code=401) diff --git a/test/preview/conftest.py b/test/preview/conftest.py index b8abfa41a6..377370bccf 100644 --- a/test/preview/conftest.py +++ b/test/preview/conftest.py @@ -1,4 +1,4 @@ -from unittest.mock import Mock +from unittest.mock import Mock, patch import pytest @@ -11,3 +11,12 @@ def mock_tokenizer(): tokenizer.encode = lambda text: text.split() tokenizer.decode = lambda tokens: " ".join(tokens) return tokenizer + + +@pytest.fixture(autouse=True) +def tenacity_wait(): + """ + Mocks tenacity's wait function to speed up tests. + """ + with patch("tenacity.nap.time"): + yield diff --git a/test/preview/llm_backends/test_chatgpt_backend.py b/test/preview/llm_backends/test_chatgpt_backend.py new file mode 100644 index 0000000000..58a78b6756 --- /dev/null +++ b/test/preview/llm_backends/test_chatgpt_backend.py @@ -0,0 +1,248 @@ +from unittest.mock import patch, Mock + +import pytest + +from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend, default_streaming_callback, ChatMessage + + +class TestChatGPTBackend: + @pytest.mark.unit + def test_init_default(self, caplog): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTBackend() + assert component.api_key is None + assert component.model_name == "gpt-3.5-turbo" + assert component.max_tokens == 500 + assert component.temperature == 0.7 + assert component.top_p == 1 + assert component.n == 1 + assert component.stop == [] + assert component.presence_penalty == 0 + assert component.frequency_penalty == 0 + assert component.logit_bias == {} + assert component.stream is False + assert component.streaming_callback == default_streaming_callback + assert component.api_base_url == "https://api.openai.com/v1" + assert component.openai_organization is None + assert component.max_tokens_limit == 4097 + + tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") + assert caplog.records[0].message == ( + "OpenAI API key is missing. You will need to provide an API key to Pipeline.run()." + ) + + @pytest.mark.unit + def test_init_with_parameters(self, caplog): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + callback = lambda x: x + component = ChatGPTBackend( + api_key="test-api-key", + model_name="gpt-4", + max_tokens=20, + temperature=1, + top_p=5, + n=10, + stop=["test-stop-word"], + presence_penalty=0.5, + frequency_penalty=0.4, + logit_bias={"test-logit-bias": 0.3}, + stream=True, + streaming_callback=callback, + api_base_url="test-base-url", + openai_organization="test-orga-id", + ) + assert component.api_key == "test-api-key" + assert component.model_name == "gpt-4" + assert component.max_tokens == 20 + assert component.temperature == 1 + assert component.top_p == 5 + assert component.n == 10 + assert component.stop == ["test-stop-word"] + assert component.presence_penalty == 0.5 + assert component.frequency_penalty == 0.4 + assert component.logit_bias == {"test-logit-bias": 0.3} + assert component.stream is True + assert component.streaming_callback == callback + assert component.api_base_url == "test-base-url" + assert component.openai_organization == "test-orga-id" + assert component.max_tokens_limit == 8192 + + tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") + assert not caplog.records + + @pytest.mark.unit + def test_init_unknown_tokenizer(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + with pytest.raises(ValueError, match="Tokenizer for model 'test-another-model-name' not found."): + ChatGPTBackend(model_name="test-another-model-name") + + @pytest.mark.unit + def test_init_unknown_token_limit(self, monkeypatch): + monkeypatch.setattr( + "haystack.preview.llm_backends.openai.chatgpt.OPENAI_TOKENIZERS", {"test-model-name": "test-encoding"} + ) + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + with pytest.raises(ValueError, match="Max tokens limit for model 'test-model-name' not found."): + ChatGPTBackend(model_name="test-model-name") + + @pytest.mark.unit + def test_to_dict_default(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTBackend() + data = component.to_dict() + assert data == { + "api_key": None, + "model_name": "gpt-3.5-turbo", + "max_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stop": [], + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": {}, + "stream": False, + # FIXME serialize callback? + "api_base_url": "https://api.openai.com/v1", + "openai_organization": None, + } + + @pytest.mark.unit + def test_to_dict_with_parameters(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + callback = lambda x: x + component = ChatGPTBackend( + api_key="test-api-key", + model_name="gpt-4", + max_tokens=20, + temperature=1, + top_p=5, + n=10, + stop=["test-stop-word"], + presence_penalty=0.5, + frequency_penalty=0.4, + logit_bias={"test-logit-bias": 0.3}, + stream=True, + streaming_callback=callback, + api_base_url="test-base-url", + openai_organization="test-orga-id", + ) + data = component.to_dict() + assert data == { + "api_key": "test-api-key", + "model_name": "gpt-4", + "max_tokens": 20, + "temperature": 1, + "top_p": 5, + "n": 10, + "stop": ["test-stop-word"], + "presence_penalty": 0.5, + "frequency_penalty": 0.4, + "logit_bias": {"test-logit-bias": 0.3}, + "stream": True, + # FIXME serialize callback? + "api_base_url": "test-base-url", + "openai_organization": "test-orga-id", + } + + @pytest.mark.unit + def test_run_no_api_key(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTBackend() + with pytest.raises(ValueError, match="OpenAI API key is missing. Please provide an API key."): + component.complete(chat=[]) + + @pytest.mark.unit + def test_complete(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + with patch("haystack.preview.llm_backends.openai.chatgpt.complete") as complete_patch: + complete_patch.side_effect = lambda payload, **kwargs: ( + [ + f"Response for {payload['messages'][1]['content']}", + f"Another Response for {payload['messages'][1]['content']}", + ], + [{"metadata of": payload["messages"][1]["content"]}], + ) + component = ChatGPTBackend( + api_key="test-api-key", openai_organization="test_orga_id", api_base_url="test-base-url" + ) + + results = component.complete( + chat=[ + ChatMessage(content="test-prompt-system", role="system"), + ChatMessage(content="test-prompt-user", role="user"), + ] + ) + + assert results == ( + [f"Response for test-prompt-user", f"Another Response for test-prompt-user"], + [{"metadata of": "test-prompt-user"}], + ) + + complete_patch.call_count == 2 + complete_patch.assert_called_once_with( + url="test-base-url/chat/completions", + headers={ + "Authorization": f"Bearer test-api-key", + "Content-Type": "application/json", + "OpenAI-Organization": "test_orga_id", + }, + payload={ + "model": "gpt-3.5-turbo", + "max_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stream": False, + "stop": [], + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": {}, + "messages": [ + {"role": "system", "content": "test-prompt-system"}, + {"role": "user", "content": "test-prompt-user"}, + ], + }, + ) + + @pytest.mark.unit + def test_complete_streaming(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + with patch("haystack.preview.llm_backends.openai.chatgpt.complete_stream") as complete_stream_patch: + complete_stream_patch.side_effect = lambda payload, **kwargs: ( + [f"Response for {payload['messages'][1]['content']}"], + [{"metadata of": payload["messages"][1]["content"]}], + ) + callback = Mock() + component = ChatGPTBackend(api_key="test-api-key", stream=True, streaming_callback=callback) + + results = component.complete( + chat=[ + ChatMessage(content="test-prompt-system", role="system"), + ChatMessage(content="test-prompt-user", role="user"), + ] + ) + + assert results == (["Response for test-prompt-user"], [{"metadata of": "test-prompt-user"}]) + complete_stream_patch.call_count == 2 + complete_stream_patch.assert_any_call( + url="https://api.openai.com/v1/chat/completions", + headers={"Authorization": f"Bearer test-api-key", "Content-Type": "application/json"}, + payload={ + "model": "gpt-3.5-turbo", + "max_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stream": True, + "stop": [], + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": {}, + "messages": [ + {"role": "system", "content": "test-prompt-system"}, + {"role": "user", "content": "test-prompt-user"}, + ], + }, + callback=callback, + ) diff --git a/test/preview/llm_backends/test_openai_helpers.py b/test/preview/llm_backends/test_openai_helpers.py new file mode 100644 index 0000000000..736d7f3dd5 --- /dev/null +++ b/test/preview/llm_backends/test_openai_helpers.py @@ -0,0 +1,252 @@ +from unittest.mock import Mock, patch +import json + +import pytest + +from haystack.preview.llm_backends.openai.errors import OpenAIUnauthorizedError, OpenAIError, OpenAIRateLimitError +from haystack.preview.llm_backends.openai._helpers import ( + ChatMessage, + raise_for_status, + check_truncated_answers, + complete, + complete_stream, + enforce_token_limit, + enforce_token_limit_chat, + OPENAI_TIMEOUT, + OPENAI_MAX_RETRIES, +) + + +@pytest.mark.unit +def test_raise_for_status_200(): + response = Mock() + response.status_code = 200 + raise_for_status(response) + + +@pytest.mark.unit +def test_raise_for_status_401(): + response = Mock() + response.status_code = 401 + with pytest.raises(OpenAIUnauthorizedError): + raise_for_status(response) + + +@pytest.mark.unit +def test_raise_for_status_429(): + response = Mock() + response.status_code = 429 + with pytest.raises(OpenAIRateLimitError): + raise_for_status(response) + + +@pytest.mark.unit +def test_raise_for_status_500(): + response = Mock() + response.status_code = 500 + response.text = "Internal Server Error" + with pytest.raises(OpenAIError): + raise_for_status(response) + + +@pytest.mark.unit +def test_check_truncated_answers(caplog): + result = { + "choices": [ + {"finish_reason": "length"}, + {"finish_reason": "content_filter"}, + {"finish_reason": "length"}, + {"finish_reason": "stop"}, + ] + } + payload = {"n": 4} + check_truncated_answers(result, payload) + assert caplog.records[0].message == ( + "2 out of the 4 completions have been truncated before reaching a natural " + "stopping point. Increase the max_tokens parameter to allow for longer completions." + ) + + +@pytest.mark.unit +def test_query_chat_model(): + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: + response = Mock() + response.status_code = 200 + response.text = """ + { + "model": "test-model", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": {"content": " Hello, how are you? "} + } + ], + "usage": { + "prompt_tokens": 4, + "completion_tokens": 5, + "total_tokens": 9 + } + + }""" + mock_post.return_value = response + replies, metadata = complete(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) + mock_post.assert_called_once_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + ) + assert replies == ["Hello, how are you?"] + assert metadata == [ + { + "model": "test-model", + "index": 0, + "finish_reason": "stop", + "prompt_tokens": 4, + "completion_tokens": 5, + "total_tokens": 9, + } + ] + + +@pytest.mark.unit +def test_query_chat_model_fail(): + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: + response = Mock() + response.status_code = 500 + mock_post.return_value = response + with pytest.raises(OpenAIError): + complete(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) + mock_post.assert_called_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + ) + mock_post.call_count == OPENAI_MAX_RETRIES + + +def mock_chat_completion_stream(model="test-model", index=0, token="test", finish_reason="stop"): + return Mock( + data=f"""{{ + "model": "{model}", + "choices": [ + {{ + "index": {index}, + "delta": {{"content": "{token}"}}, + "finish_reason": "{finish_reason}" + }} + ] + }}""" + ) + + +@pytest.mark.unit +def test_query_chat_model_stream(): + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: + with patch("haystack.preview.llm_backends.openai._helpers.sseclient.SSEClient") as mock_sseclient: + callback = lambda token, event_data: f"|{token}|" + response = Mock() + response.status_code = 200 + + mock_sseclient.return_value.events.return_value = [ + mock_chat_completion_stream(token="Hello"), + mock_chat_completion_stream(token=","), + mock_chat_completion_stream(token=" how"), + mock_chat_completion_stream(token=" are"), + mock_chat_completion_stream(token=" you"), + mock_chat_completion_stream(token="?"), + Mock(data="[DONE]"), + mock_chat_completion_stream(token="discarded tokens"), + ] + + mock_post.return_value = response + replies, metadata = complete_stream( + url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback + ) + mock_post.assert_called_once_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + stream=True, + ) + assert replies == ["|Hello||,|| how|| are|| you||?|"] + assert metadata == [{"model": "test-model", "index": 0, "finish_reason": "stop"}] + + +@pytest.mark.unit +def test_query_chat_model_stream_fail(): + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: + callback = Mock() + response = Mock() + response.status_code = 500 + mock_post.return_value = response + with pytest.raises(OpenAIError): + complete_stream( + url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback + ) + mock_post.assert_called_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + ) + mock_post.call_count == OPENAI_MAX_RETRIES + + +@pytest.mark.unit +def test_enforce_token_limit_above_limit(caplog, mock_tokenizer): + prompt = enforce_token_limit("This is a test prompt.", tokenizer=mock_tokenizer, max_tokens_limit=3) + assert prompt == "This is a" + assert caplog.records[0].message == ( + "The prompt has been truncated from 5 tokens to 3 tokens to fit within the max token " + "limit. Reduce the length of the prompt to prevent it from being cut off." + ) + + +@pytest.mark.unit +def test_enforce_token_limit_below_limit(caplog, mock_tokenizer): + prompt = enforce_token_limit("This is a test prompt.", tokenizer=mock_tokenizer, max_tokens_limit=100) + assert prompt == "This is a test prompt." + assert not caplog.records + + +@pytest.mark.unit +def test_enforce_token_limit_chat_above_limit(caplog, mock_tokenizer): + prompts = enforce_token_limit_chat( + [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a test prompt.", role="user"), + ], + tokenizer=mock_tokenizer, + max_tokens_limit=7, + tokens_per_message_overhead=2, + ) + assert prompts == [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a", role="user"), + ] + assert caplog.records[0].message == ( + "The chat have been truncated from 11 tokens to 7 tokens to fit within the max token limit. " + "Reduce the length of the chat to prevent it from being cut off." + ) + + +@pytest.mark.unit +def test_enforce_token_limit_chat_below_limit(caplog, mock_tokenizer): + prompts = enforce_token_limit_chat( + [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a test prompt.", role="user"), + ], + tokenizer=mock_tokenizer, + max_tokens_limit=100, + tokens_per_message_overhead=2, + ) + assert prompts == [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a test prompt.", role="user"), + ] + assert not caplog.records From f59abe8f403d038a0830b65b9f098dc3fb5a83ad Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 15:50:03 +0200 Subject: [PATCH 02/37] fix tests --- .../preview/llm_backends/openai/chatgpt.py | 192 +++++++----------- .../llm_backends/test_chatgpt_backend.py | 126 +++--------- 2 files changed, 103 insertions(+), 215 deletions(-) diff --git a/haystack/preview/llm_backends/openai/chatgpt.py b/haystack/preview/llm_backends/openai/chatgpt.py index d416f6e453..94e3741330 100644 --- a/haystack/preview/llm_backends/openai/chatgpt.py +++ b/haystack/preview/llm_backends/openai/chatgpt.py @@ -6,7 +6,6 @@ from haystack.preview.lazy_imports import LazyImport from haystack.preview.llm_backends.chat_message import ChatMessage from haystack.preview.llm_backends.openai._helpers import ( - default_streaming_callback, complete, complete_stream, enforce_token_limit_chat, @@ -23,6 +22,18 @@ TOKENS_PER_MESSAGE_OVERHEAD = 4 +DEFAULT_OPENAI_PARAMS = { + "max_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stop": [], + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": {}, + "stream": False, + "openai_organization": None, +} class ChatGPTBackend: @@ -39,67 +50,48 @@ def __init__( self, api_key: Optional[str] = None, model_name: str = "gpt-3.5-turbo", - max_tokens: Optional[int] = 500, - temperature: Optional[float] = 0.7, - top_p: Optional[float] = 1, - n: Optional[int] = 1, - stop: Optional[List[str]] = None, - presence_penalty: Optional[float] = 0, - frequency_penalty: Optional[float] = 0, - logit_bias: Optional[Dict[str, float]] = None, - stream: bool = False, - streaming_callback: Optional[Callable] = default_streaming_callback, + model_parameters: Optional[Dict[str, Any]] = None, + streaming_callback: Optional[Callable] = None, api_base_url: str = "https://api.openai.com/v1", - openai_organization: Optional[str] = None, ): """ Creates an instance of ChatGPTGenerator for OpenAI's GPT-3.5 model. :param api_key: The OpenAI API key. :param model_name: The name or path of the underlying model. - :param max_tokens: The maximum number of tokens the output text can have. - :param temperature: What sampling temperature to use. Higher values means the model will take more risks. - Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. - :param top_p: An alternative to sampling with temperature, called nucleus sampling, where the model - considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens - comprising the top 10% probability mass are considered. - :param n: How many completions to generate for each prompt. - :param stop: One or more sequences where the API will stop generating further tokens. - :param presence_penalty: What penalty to apply if a token is already present at all. Bigger values mean - the model will be less likely to repeat the same token in the text. - :param frequency_penalty: What penalty to apply if a token has already been generated in the text. - Bigger values mean the model will be less likely to repeat the same token in the text. - :param logit_bias: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the - values are the bias to add to that token. - :param stream: If set to True, the API will stream the response. The streaming_callback parameter - is used to process the stream. If set to False, the response will be returned as a string. :param streaming_callback: A callback function that is called when a new token is received from the stream. The callback function should accept two parameters: the token received from the stream and **kwargs. The callback function should return the token to be sent to the stream. If the callback function is not provided, the token is printed to stdout. :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. - :param openai_organization: The OpenAI organization ID. + :param model_parameters: A dictionary of parameters to use for the model. See OpenAI + [documentation](https://platform.openai.com/docs/api-reference/chat) for more details. Some of the supported + parameters: + - `max_tokens`: The maximum number of tokens the output text can have. + - `temperature`: What sampling temperature to use. Higher values means the model will take more risks. + Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + - `n`: How many completions to generate for each prompt. For example, if the LLM gets 3 prompts and n is 2, + it will generate two completions for each of the three prompts, ending up with 6 completions in total. + - `stop`: One or more sequences after which the LLM should stop generating tokens. + - `presence_penalty`: What penalty to apply if a token is already present at all. Bigger values mean + the model will be less likely to repeat the same token in the text. + - `frequency_penalty`: What penalty to apply if a token has already been generated in the text. + Bigger values mean the model will be less likely to repeat the same token in the text. + - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the + values are the bias to add to that token. + - `openai_organization`: The OpenAI organization ID. - See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. """ if not api_key: logger.warning("OpenAI API key is missing. You will need to provide an API key to Pipeline.run().") self.api_key = api_key self.model_name = model_name - - self.max_tokens = max_tokens - self.temperature = temperature - self.top_p = top_p - self.n = n - self.stop = stop or [] - self.presence_penalty = presence_penalty - self.frequency_penalty = frequency_penalty - self.logit_bias = logit_bias or {} - self.stream = stream - self.streaming_callback = streaming_callback or default_streaming_callback - - self.openai_organization = openai_organization + self.model_parameters = DEFAULT_OPENAI_PARAMS | (model_parameters or {}) + self.streaming_callback = streaming_callback self.api_base_url = api_base_url tokenizer = None @@ -120,44 +112,14 @@ def __init__( raise ValueError(f"Max tokens limit for model '{model_name}' not found.") self.max_tokens_limit = max_tokens_limit - def to_dict(self) -> Dict[str, Any]: - """ - Serialize to a dictionary. - """ - return { - "api_key": self.api_key, - "model_name": self.model_name, - "max_tokens": self.max_tokens, - "temperature": self.temperature, - "top_p": self.top_p, - "n": self.n, - "stop": self.stop, - "presence_penalty": self.presence_penalty, - "frequency_penalty": self.frequency_penalty, - "logit_bias": self.logit_bias, - "stream": self.stream, - # FIXME how to serialize the streaming callback? - "api_base_url": self.api_base_url, - "openai_organization": self.openai_organization, - } - def complete( self, chat: List[ChatMessage], api_key: Optional[str] = None, model_name: Optional[str] = None, - max_tokens: Optional[int] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - n: Optional[int] = None, - stop: Optional[List[str]] = None, - presence_penalty: Optional[float] = None, - frequency_penalty: Optional[float] = None, - logit_bias: Optional[Dict[str, float]] = None, - api_base_url: Optional[str] = None, - openai_organization: Optional[str] = None, - stream: Optional[bool] = None, + model_parameters: Optional[Dict[str, Any]] = None, streaming_callback: Optional[Callable] = None, + api_base_url: Optional[str] = None, ): """ Queries the LLM with the prompts to produce replies. @@ -165,62 +127,43 @@ def complete( :param chat: The chat to be sent to the generative model. :param api_key: The OpenAI API key. :param model_name: The name or path of the underlying model. - :param max_tokens: The maximum number of tokens the output text can have. - :param temperature: What sampling temperature to use. Higher values means the model will take more risks. - Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. - :param top_p: An alternative to sampling with temperature, called nucleus sampling, where the model - considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens - comprising the top 10% probability mass are considered. - :param n: How many completions to generate for each prompt. - :param stop: One or more sequences where the API will stop generating further tokens. - :param presence_penalty: What penalty to apply if a token is already present at all. Bigger values mean - the model will be less likely to repeat the same token in the text. - :param frequency_penalty: What penalty to apply if a token has already been generated in the text. - Bigger values mean the model will be less likely to repeat the same token in the text. - :param logit_bias: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the - values are the bias to add to that token. - :param stream: If set to True, the API will stream the response. The streaming_callback parameter - is used to process the stream. If set to False, the response will be returned as a string. :param streaming_callback: A callback function that is called when a new token is received from the stream. The callback function should accept two parameters: the token received from the stream and **kwargs. The callback function should return the token to be sent to the stream. If the callback function is not provided, the token is printed to stdout. :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. - :param openai_organization: The OpenAI organization ID. + :param model_parameters: A dictionary of parameters to use for the model. See OpenAI + [documentation](https://platform.openai.com/docs/api-reference/chat) for more details. Some of the supported + parameters: + - `max_tokens`: The maximum number of tokens the output text can have. + - `temperature`: What sampling temperature to use. Higher values means the model will take more risks. + Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + - `n`: How many completions to generate for each prompt. For example, if the LLM gets 3 prompts and n is 2, + it will generate two completions for each of the three prompts, ending up with 6 completions in total. + - `stop`: One or more sequences after which the LLM should stop generating tokens. + - `presence_penalty`: What penalty to apply if a token is already present at all. Bigger values mean + the model will be less likely to repeat the same token in the text. + - `frequency_penalty`: What penalty to apply if a token has already been generated in the text. + Bigger values mean the model will be less likely to repeat the same token in the text. + - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the + values are the bias to add to that token. + - `openai_organization`: The OpenAI organization ID. - See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. """ api_key = api_key if api_key is not None else self.api_key if not api_key: raise ValueError("OpenAI API key is missing. Please provide an API key.") - model_name = model_name if model_name is not None else self.model_name - max_tokens = max_tokens if max_tokens is not None else self.max_tokens - temperature = temperature if temperature is not None else self.temperature - top_p = top_p if top_p is not None else self.top_p - n = n if n is not None else self.n - stop = stop if stop is not None else self.stop - presence_penalty = presence_penalty if presence_penalty is not None else self.presence_penalty - frequency_penalty = frequency_penalty if frequency_penalty is not None else self.frequency_penalty - logit_bias = logit_bias if logit_bias is not None else self.logit_bias - stream = stream if stream is not None else self.stream - streaming_callback = streaming_callback if streaming_callback is not None else self.streaming_callback + model_name = model_name or self.model_name + model_parameters = self.model_parameters | (model_parameters or {}) + streaming_callback = streaming_callback or self.streaming_callback api_base_url = api_base_url or self.api_base_url - openai_organization = openai_organization if openai_organization is not None else self.openai_organization - parameters = { - "model": model_name, - "max_tokens": max_tokens, - "temperature": temperature, - "top_p": top_p, - "n": n, - "stream": stream, - "stop": stop, - "presence_penalty": presence_penalty, - "frequency_penalty": frequency_penalty, - "logit_bias": logit_bias, - } + openai_organization = model_parameters.pop("openai_organization", None) headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} if openai_organization: headers["OpenAI-Organization"] = openai_organization @@ -232,8 +175,15 @@ def complete( max_tokens_limit=self.max_tokens_limit, tokens_per_message_overhead=TOKENS_PER_MESSAGE_OVERHEAD, ) - payload = {**parameters, "messages": [asdict(message) for message in chat]} - if stream: + + print(model_parameters) + + payload = { + "model": model_name, + **model_parameters, + "stream": streaming_callback is not None, + "messages": [asdict(message) for message in chat], + } + if streaming_callback: return complete_stream(url=url, headers=headers, payload=payload, callback=streaming_callback) - else: - return complete(url=url, headers=headers, payload=payload) + return complete(url=url, headers=headers, payload=payload) diff --git a/test/preview/llm_backends/test_chatgpt_backend.py b/test/preview/llm_backends/test_chatgpt_backend.py index 58a78b6756..b00f287ef3 100644 --- a/test/preview/llm_backends/test_chatgpt_backend.py +++ b/test/preview/llm_backends/test_chatgpt_backend.py @@ -2,7 +2,7 @@ import pytest -from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend, default_streaming_callback, ChatMessage +from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend, ChatMessage class TestChatGPTBackend: @@ -12,18 +12,20 @@ def test_init_default(self, caplog): component = ChatGPTBackend() assert component.api_key is None assert component.model_name == "gpt-3.5-turbo" - assert component.max_tokens == 500 - assert component.temperature == 0.7 - assert component.top_p == 1 - assert component.n == 1 - assert component.stop == [] - assert component.presence_penalty == 0 - assert component.frequency_penalty == 0 - assert component.logit_bias == {} - assert component.stream is False - assert component.streaming_callback == default_streaming_callback + assert component.model_parameters == { + "max_tokens": 500, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stop": [], + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": {}, + "stream": False, + "openai_organization": None, + } + assert component.streaming_callback is None assert component.api_base_url == "https://api.openai.com/v1" - assert component.openai_organization is None assert component.max_tokens_limit == 4097 tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") @@ -38,33 +40,27 @@ def test_init_with_parameters(self, caplog): component = ChatGPTBackend( api_key="test-api-key", model_name="gpt-4", - max_tokens=20, - temperature=1, - top_p=5, - n=10, - stop=["test-stop-word"], - presence_penalty=0.5, - frequency_penalty=0.4, - logit_bias={"test-logit-bias": 0.3}, - stream=True, + model_parameters={"max_tokens": 100, "extra-param": "value"}, streaming_callback=callback, api_base_url="test-base-url", - openai_organization="test-orga-id", ) assert component.api_key == "test-api-key" assert component.model_name == "gpt-4" - assert component.max_tokens == 20 - assert component.temperature == 1 - assert component.top_p == 5 - assert component.n == 10 - assert component.stop == ["test-stop-word"] - assert component.presence_penalty == 0.5 - assert component.frequency_penalty == 0.4 - assert component.logit_bias == {"test-logit-bias": 0.3} - assert component.stream is True + assert component.model_parameters == { + "max_tokens": 100, + "temperature": 0.7, + "top_p": 1, + "n": 1, + "stop": [], + "presence_penalty": 0, + "frequency_penalty": 0, + "logit_bias": {}, + "stream": False, + "openai_organization": None, + "extra-param": "value", + } assert component.streaming_callback == callback assert component.api_base_url == "test-base-url" - assert component.openai_organization == "test-orga-id" assert component.max_tokens_limit == 8192 tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") @@ -85,66 +81,6 @@ def test_init_unknown_token_limit(self, monkeypatch): with pytest.raises(ValueError, match="Max tokens limit for model 'test-model-name' not found."): ChatGPTBackend(model_name="test-model-name") - @pytest.mark.unit - def test_to_dict_default(self): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - component = ChatGPTBackend() - data = component.to_dict() - assert data == { - "api_key": None, - "model_name": "gpt-3.5-turbo", - "max_tokens": 500, - "temperature": 0.7, - "top_p": 1, - "n": 1, - "stop": [], - "presence_penalty": 0, - "frequency_penalty": 0, - "logit_bias": {}, - "stream": False, - # FIXME serialize callback? - "api_base_url": "https://api.openai.com/v1", - "openai_organization": None, - } - - @pytest.mark.unit - def test_to_dict_with_parameters(self): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - callback = lambda x: x - component = ChatGPTBackend( - api_key="test-api-key", - model_name="gpt-4", - max_tokens=20, - temperature=1, - top_p=5, - n=10, - stop=["test-stop-word"], - presence_penalty=0.5, - frequency_penalty=0.4, - logit_bias={"test-logit-bias": 0.3}, - stream=True, - streaming_callback=callback, - api_base_url="test-base-url", - openai_organization="test-orga-id", - ) - data = component.to_dict() - assert data == { - "api_key": "test-api-key", - "model_name": "gpt-4", - "max_tokens": 20, - "temperature": 1, - "top_p": 5, - "n": 10, - "stop": ["test-stop-word"], - "presence_penalty": 0.5, - "frequency_penalty": 0.4, - "logit_bias": {"test-logit-bias": 0.3}, - "stream": True, - # FIXME serialize callback? - "api_base_url": "test-base-url", - "openai_organization": "test-orga-id", - } - @pytest.mark.unit def test_run_no_api_key(self): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: @@ -164,7 +100,9 @@ def test_complete(self): [{"metadata of": payload["messages"][1]["content"]}], ) component = ChatGPTBackend( - api_key="test-api-key", openai_organization="test_orga_id", api_base_url="test-base-url" + api_key="test-api-key", + model_parameters={"openai_organization": "test_orga_id"}, + api_base_url="test-base-url", ) results = component.complete( @@ -214,7 +152,7 @@ def test_complete_streaming(self): [{"metadata of": payload["messages"][1]["content"]}], ) callback = Mock() - component = ChatGPTBackend(api_key="test-api-key", stream=True, streaming_callback=callback) + component = ChatGPTBackend(api_key="test-api-key", streaming_callback=callback) results = component.complete( chat=[ From 5f70a6514c8684bf4b93490ad60215774c3599e3 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 16:01:50 +0200 Subject: [PATCH 03/37] reno --- releasenotes/notes/chatgpt-llm-backend-d043532654efe684.yaml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 releasenotes/notes/chatgpt-llm-backend-d043532654efe684.yaml diff --git a/releasenotes/notes/chatgpt-llm-backend-d043532654efe684.yaml b/releasenotes/notes/chatgpt-llm-backend-d043532654efe684.yaml new file mode 100644 index 0000000000..31334d5c8d --- /dev/null +++ b/releasenotes/notes/chatgpt-llm-backend-d043532654efe684.yaml @@ -0,0 +1,2 @@ +preview: + - Introduce `ChatGPTBackend`, a class that will be used by LLM components to talk to OpenAI Chat models like ChatGPT and GPT4. Note that ChatGPTBackend itself is NOT a component. From ffb1a8f121e3b3fb2e6c3ce2c7d51eb834ff1f50 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 16:03:21 +0200 Subject: [PATCH 04/37] remove print --- haystack/preview/llm_backends/openai/_helpers.py | 1 - haystack/preview/llm_backends/openai/chatgpt.py | 3 --- 2 files changed, 4 deletions(-) diff --git a/haystack/preview/llm_backends/openai/_helpers.py b/haystack/preview/llm_backends/openai/_helpers.py index 1b446e319b..431fd72ef6 100644 --- a/haystack/preview/llm_backends/openai/_helpers.py +++ b/haystack/preview/llm_backends/openai/_helpers.py @@ -204,7 +204,6 @@ def enforce_token_limit_chat( :param tokens_per_message_overhead: The number of tokens that are added to the prompt text for each message. :return: A chat that fits within the max tokens limit of the model. """ - print(chat) messages_len = [len(tokenizer.encode(message.content)) + tokens_per_message_overhead for message in chat] if (total_chat_length := sum(messages_len)) <= max_tokens_limit: return chat diff --git a/haystack/preview/llm_backends/openai/chatgpt.py b/haystack/preview/llm_backends/openai/chatgpt.py index 94e3741330..34e2e4211c 100644 --- a/haystack/preview/llm_backends/openai/chatgpt.py +++ b/haystack/preview/llm_backends/openai/chatgpt.py @@ -175,9 +175,6 @@ def complete( max_tokens_limit=self.max_tokens_limit, tokens_per_message_overhead=TOKENS_PER_MESSAGE_OVERHEAD, ) - - print(model_parameters) - payload = { "model": model_name, **model_parameters, From 853f29d992cff2039b083dfa6fc35bd2c3fc6cfa Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 16:21:48 +0200 Subject: [PATCH 05/37] helpers tests --- .../generators/openai/test_openai_helpers.py | 234 +++++++++++++++++- 1 file changed, 233 insertions(+), 1 deletion(-) diff --git a/test/preview/components/generators/openai/test_openai_helpers.py b/test/preview/components/generators/openai/test_openai_helpers.py index 23a66117d1..736d7f3dd5 100644 --- a/test/preview/components/generators/openai/test_openai_helpers.py +++ b/test/preview/components/generators/openai/test_openai_helpers.py @@ -1,6 +1,199 @@ +from unittest.mock import Mock, patch +import json + import pytest -from haystack.preview.components.generators.openai._helpers import enforce_token_limit +from haystack.preview.llm_backends.openai.errors import OpenAIUnauthorizedError, OpenAIError, OpenAIRateLimitError +from haystack.preview.llm_backends.openai._helpers import ( + ChatMessage, + raise_for_status, + check_truncated_answers, + complete, + complete_stream, + enforce_token_limit, + enforce_token_limit_chat, + OPENAI_TIMEOUT, + OPENAI_MAX_RETRIES, +) + + +@pytest.mark.unit +def test_raise_for_status_200(): + response = Mock() + response.status_code = 200 + raise_for_status(response) + + +@pytest.mark.unit +def test_raise_for_status_401(): + response = Mock() + response.status_code = 401 + with pytest.raises(OpenAIUnauthorizedError): + raise_for_status(response) + + +@pytest.mark.unit +def test_raise_for_status_429(): + response = Mock() + response.status_code = 429 + with pytest.raises(OpenAIRateLimitError): + raise_for_status(response) + + +@pytest.mark.unit +def test_raise_for_status_500(): + response = Mock() + response.status_code = 500 + response.text = "Internal Server Error" + with pytest.raises(OpenAIError): + raise_for_status(response) + + +@pytest.mark.unit +def test_check_truncated_answers(caplog): + result = { + "choices": [ + {"finish_reason": "length"}, + {"finish_reason": "content_filter"}, + {"finish_reason": "length"}, + {"finish_reason": "stop"}, + ] + } + payload = {"n": 4} + check_truncated_answers(result, payload) + assert caplog.records[0].message == ( + "2 out of the 4 completions have been truncated before reaching a natural " + "stopping point. Increase the max_tokens parameter to allow for longer completions." + ) + + +@pytest.mark.unit +def test_query_chat_model(): + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: + response = Mock() + response.status_code = 200 + response.text = """ + { + "model": "test-model", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": {"content": " Hello, how are you? "} + } + ], + "usage": { + "prompt_tokens": 4, + "completion_tokens": 5, + "total_tokens": 9 + } + + }""" + mock_post.return_value = response + replies, metadata = complete(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) + mock_post.assert_called_once_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + ) + assert replies == ["Hello, how are you?"] + assert metadata == [ + { + "model": "test-model", + "index": 0, + "finish_reason": "stop", + "prompt_tokens": 4, + "completion_tokens": 5, + "total_tokens": 9, + } + ] + + +@pytest.mark.unit +def test_query_chat_model_fail(): + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: + response = Mock() + response.status_code = 500 + mock_post.return_value = response + with pytest.raises(OpenAIError): + complete(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) + mock_post.assert_called_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + ) + mock_post.call_count == OPENAI_MAX_RETRIES + + +def mock_chat_completion_stream(model="test-model", index=0, token="test", finish_reason="stop"): + return Mock( + data=f"""{{ + "model": "{model}", + "choices": [ + {{ + "index": {index}, + "delta": {{"content": "{token}"}}, + "finish_reason": "{finish_reason}" + }} + ] + }}""" + ) + + +@pytest.mark.unit +def test_query_chat_model_stream(): + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: + with patch("haystack.preview.llm_backends.openai._helpers.sseclient.SSEClient") as mock_sseclient: + callback = lambda token, event_data: f"|{token}|" + response = Mock() + response.status_code = 200 + + mock_sseclient.return_value.events.return_value = [ + mock_chat_completion_stream(token="Hello"), + mock_chat_completion_stream(token=","), + mock_chat_completion_stream(token=" how"), + mock_chat_completion_stream(token=" are"), + mock_chat_completion_stream(token=" you"), + mock_chat_completion_stream(token="?"), + Mock(data="[DONE]"), + mock_chat_completion_stream(token="discarded tokens"), + ] + + mock_post.return_value = response + replies, metadata = complete_stream( + url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback + ) + mock_post.assert_called_once_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + stream=True, + ) + assert replies == ["|Hello||,|| how|| are|| you||?|"] + assert metadata == [{"model": "test-model", "index": 0, "finish_reason": "stop"}] + + +@pytest.mark.unit +def test_query_chat_model_stream_fail(): + with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: + callback = Mock() + response = Mock() + response.status_code = 500 + mock_post.return_value = response + with pytest.raises(OpenAIError): + complete_stream( + url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback + ) + mock_post.assert_called_with( + "test-url", + headers={"header": "test-header"}, + data=json.dumps({"param": "test-param"}), + timeout=OPENAI_TIMEOUT, + ) + mock_post.call_count == OPENAI_MAX_RETRIES @pytest.mark.unit @@ -18,3 +211,42 @@ def test_enforce_token_limit_below_limit(caplog, mock_tokenizer): prompt = enforce_token_limit("This is a test prompt.", tokenizer=mock_tokenizer, max_tokens_limit=100) assert prompt == "This is a test prompt." assert not caplog.records + + +@pytest.mark.unit +def test_enforce_token_limit_chat_above_limit(caplog, mock_tokenizer): + prompts = enforce_token_limit_chat( + [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a test prompt.", role="user"), + ], + tokenizer=mock_tokenizer, + max_tokens_limit=7, + tokens_per_message_overhead=2, + ) + assert prompts == [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a", role="user"), + ] + assert caplog.records[0].message == ( + "The chat have been truncated from 11 tokens to 7 tokens to fit within the max token limit. " + "Reduce the length of the chat to prevent it from being cut off." + ) + + +@pytest.mark.unit +def test_enforce_token_limit_chat_below_limit(caplog, mock_tokenizer): + prompts = enforce_token_limit_chat( + [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a test prompt.", role="user"), + ], + tokenizer=mock_tokenizer, + max_tokens_limit=100, + tokens_per_message_overhead=2, + ) + assert prompts == [ + ChatMessage(content="System Prompt", role="system"), + ChatMessage(content="This is a test prompt.", role="user"), + ] + assert not caplog.records From f0c5a8dc669617be56027ae6fb6bd13ef7b036da Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 16:51:32 +0200 Subject: [PATCH 06/37] add chatgpt generator --- .../components/test_chatgpt_generator.py | 64 +++++ .../components/generators/openai/_helpers.py | 33 --- .../components/generators/openai/chatgpt.py | 201 ++++++++++++++ .../preview/llm_backends/openai/_helpers.py | 9 - .../preview/llm_backends/openai/chatgpt.py | 4 +- .../openai/test_chatgpt_generator.py | 149 +++++++++++ .../generators/openai/test_openai_helpers.py | 252 ------------------ 7 files changed, 416 insertions(+), 296 deletions(-) create mode 100644 e2e/preview/components/test_chatgpt_generator.py delete mode 100644 haystack/preview/components/generators/openai/_helpers.py create mode 100644 haystack/preview/components/generators/openai/chatgpt.py create mode 100644 test/preview/components/generators/openai/test_chatgpt_generator.py delete mode 100644 test/preview/components/generators/openai/test_openai_helpers.py diff --git a/e2e/preview/components/test_chatgpt_generator.py b/e2e/preview/components/test_chatgpt_generator.py new file mode 100644 index 0000000000..c3fad4038d --- /dev/null +++ b/e2e/preview/components/test_chatgpt_generator.py @@ -0,0 +1,64 @@ +import os +import pytest +from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator + + +@pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", +) +def test_chatgpt_generator_run(): + component = ChatGPTGenerator(api_key=os.environ.get("OPENAI_API_KEY")) + results = component.run( + prompts=["What's the capital of France?", "What's the capital of Germany?"], model_parameters={"n": 1} + ) + + assert len(results["replies"]) == 2 + assert len(results["replies"][0]) == 1 + assert "Paris" in results["replies"][0][0] + assert len(results["replies"][1]) == 1 + assert "Berlin" in results["replies"][1][0] + + assert len(results["metadata"]) == 2 + assert len(results["metadata"][0]) == 1 + assert "gpt-3.5-turbo" in results["metadata"][0][0]["model"] + assert "stop" == results["metadata"][0][0]["finish_reason"] + assert len(results["metadata"][1]) == 1 + assert "gpt-3.5-turbo" in results["metadata"][1][0]["model"] + assert "stop" == results["metadata"][1][0]["finish_reason"] + + +@pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", +) +def test_chatgpt_generator_run_streaming(): + class Callback: + def __init__(self): + self.responses = "" + + def __call__(self, token, event_data): + self.responses += token + return token + + callback = Callback() + component = ChatGPTGenerator(os.environ.get("OPENAI_API_KEY"), streaming_callback=callback) + results = component.run( + prompts=["What's the capital of France?", "What's the capital of Germany?"], model_parameters={"n": 1} + ) + + assert len(results["replies"]) == 2 + assert len(results["replies"][0]) == 1 + assert "Paris" in results["replies"][0][0] + assert len(results["replies"][1]) == 1 + assert "Berlin" in results["replies"][1][0] + + assert callback.responses == results["replies"][0][0] + results["replies"][1][0] + + assert len(results["metadata"]) == 2 + assert len(results["metadata"][0]) == 1 + assert "gpt-3.5-turbo" in results["metadata"][0][0]["model"] + assert "stop" == results["metadata"][0][0]["finish_reason"] + assert len(results["metadata"][1]) == 1 + assert "gpt-3.5-turbo" in results["metadata"][1][0]["model"] + assert "stop" == results["metadata"][1][0]["finish_reason"] diff --git a/haystack/preview/components/generators/openai/_helpers.py b/haystack/preview/components/generators/openai/_helpers.py deleted file mode 100644 index 946901b644..0000000000 --- a/haystack/preview/components/generators/openai/_helpers.py +++ /dev/null @@ -1,33 +0,0 @@ -import logging - -from haystack.preview.lazy_imports import LazyImport - -with LazyImport("Run 'pip install tiktoken'") as tiktoken_import: - import tiktoken - - -logger = logging.getLogger(__name__) - - -def enforce_token_limit(prompt: str, tokenizer: "tiktoken.Encoding", max_tokens_limit: int) -> str: - """ - Ensure that the length of the prompt is within the max tokens limit of the model. - If needed, truncate the prompt text so that it fits within the limit. - - :param prompt: Prompt text to be sent to the generative model. - :param tokenizer: The tokenizer used to encode the prompt. - :param max_tokens_limit: The max tokens limit of the model. - :return: The prompt text that fits within the max tokens limit of the model. - """ - tiktoken_import.check() - tokens = tokenizer.encode(prompt) - tokens_count = len(tokens) - if tokens_count > max_tokens_limit: - logger.warning( - "The prompt has been truncated from %s tokens to %s tokens to fit within the max token limit. " - "Reduce the length of the prompt to prevent it from being cut off.", - tokens_count, - max_tokens_limit, - ) - prompt = tokenizer.decode(tokens[:max_tokens_limit]) - return prompt diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py new file mode 100644 index 0000000000..fcb9047d75 --- /dev/null +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -0,0 +1,201 @@ +from typing import Optional, List, Callable, Dict, Any + +import sys +import builtins +import logging + +from haystack.preview import component, default_from_dict, default_to_dict, DeserializationError +from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend +from haystack.preview.llm_backends.chat_message import ChatMessage + + +logger = logging.getLogger(__name__) + + +TOKENS_PER_MESSAGE_OVERHEAD = 4 + + +def default_streaming_callback(token: str, **kwargs): + """ + Default callback function for streaming responses from OpenAI API. + Prints the tokens to stdout as soon as they are received and returns them. + """ + print(token, flush=True, end="") + return token + + +@component +class ChatGPTGenerator: + """ + ChatGPT LLM Generator. + + Queries ChatGPT using OpenAI's GPT-3 ChatGPT API. Invocations are made using REST API. + See [OpenAI ChatGPT API](https://platform.openai.com/docs/guides/chat) for more details. + """ + + # TODO support function calling! + + def __init__( + self, + api_key: Optional[str] = None, + model_name: str = "gpt-3.5-turbo", + system_prompt: Optional[str] = None, + model_parameters: Optional[Dict[str, Any]] = None, + streaming_callback: Optional[Callable] = None, + api_base_url: str = "https://api.openai.com/v1", + ): + """ + Creates an instance of ChatGPTGenerator for OpenAI's GPT-3.5 model. + + :param api_key: The OpenAI API key. + :param model_name: The name of the model to use. + :param system_prompt: The prompt to be prepended to the user prompt. + :param streaming_callback: A callback function that is called when a new token is received from the stream. + The callback function should accept two parameters: the token received from the stream and **kwargs. + The callback function should return the token to be sent to the stream. If the callback function is not + provided, the token is printed to stdout. + :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. + :param model_parameters: A dictionary of parameters to use for the model. See OpenAI + [documentation](https://platform.openai.com/docs/api-reference/chat) for more details. Some of the supported + parameters: + - `max_tokens`: The maximum number of tokens the output text can have. + - `temperature`: What sampling temperature to use. Higher values means the model will take more risks. + Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + - `n`: How many completions to generate for each prompt. For example, if the LLM gets 3 prompts and n is 2, + it will generate two completions for each of the three prompts, ending up with 6 completions in total. + - `stop`: One or more sequences after which the LLM should stop generating tokens. + - `presence_penalty`: What penalty to apply if a token is already present at all. Bigger values mean + the model will be less likely to repeat the same token in the text. + - `frequency_penalty`: What penalty to apply if a token has already been generated in the text. + Bigger values mean the model will be less likely to repeat the same token in the text. + - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the + values are the bias to add to that token. + - `openai_organization`: The OpenAI organization ID. + """ + self.llm = ChatGPTBackend( + api_key=api_key, + model_name=model_name, + model_parameters=model_parameters, + streaming_callback=streaming_callback, + api_base_url=api_base_url, + ) + self.system_prompt = system_prompt + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize this component to a dictionary. + """ + if self.llm.streaming_callback: + module = sys.modules.get(self.llm.streaming_callback.__module__) + if not module: + raise ValueError("Could not locate the import module.") + if module == builtins: + callback_name = self.llm.streaming_callback.__name__ + else: + callback_name = f"{module.__name__}.{self.llm.streaming_callback.__name__}" + else: + callback_name = None + + return default_to_dict( + self, + api_key=self.llm.api_key, + model_name=self.llm.model_name, + model_parameters=self.llm.model_parameters, + system_prompt=self.system_prompt, + streaming_callback=callback_name, + api_base_url=self.llm.api_base_url, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "ChatGPTGenerator": + """ + Deserialize this component from a dictionary. + """ + init_params = data.get("init_parameters", {}) + streaming_callback = None + if "streaming_callback" in init_params: + parts = init_params["streaming_callback"].split(".") + module_name = ".".join(parts[:-1]) + function_name = parts[-1] + module = sys.modules.get(module_name, None) + if not module: + raise DeserializationError(f"Could not locate the module of the streaming callback: {module_name}") + streaming_callback = getattr(module, function_name, None) + if not streaming_callback: + raise DeserializationError(f"Could not locate the streaming callback: {function_name}") + data["init_parameters"]["streaming_callback"] = streaming_callback + return default_from_dict(cls, data) + + @component.output_types(replies=List[List[str]], metadata=List[Dict[str, Any]]) + def run( + self, + prompts: List[str], + api_key: Optional[str] = None, + model_name: str = "gpt-3.5-turbo", + system_prompt: Optional[str] = None, + model_parameters: Optional[Dict[str, Any]] = None, + streaming_callback: Optional[Callable] = None, + api_base_url: str = "https://api.openai.com/v1", + ): + """ + Queries the LLM with the prompts to produce replies. + + :param prompts: The prompts to be sent to the generative model. + :param api_key: The OpenAI API key. + :param model_name: The name of the model to use. + :param system_prompt: The prompt to be prepended to the user prompt. + :param streaming_callback: A callback function that is called when a new token is received from the stream. + The callback function should accept two parameters: the token received from the stream and **kwargs. + The callback function should return the token to be sent to the stream. If the callback function is not + provided, the token is printed to stdout. + :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. + :param model_parameters: A dictionary of parameters to use for the model. See OpenAI + [documentation](https://platform.openai.com/docs/api-reference/chat) for more details. Some of the supported + parameters: + - `max_tokens`: The maximum number of tokens the output text can have. + - `temperature`: What sampling temperature to use. Higher values means the model will take more risks. + Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. + - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model + considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens + comprising the top 10% probability mass are considered. + - `n`: How many completions to generate for each prompt. For example, if the LLM gets 3 prompts and n is 2, + it will generate two completions for each of the three prompts, ending up with 6 completions in total. + - `stop`: One or more sequences after which the LLM should stop generating tokens. + - `presence_penalty`: What penalty to apply if a token is already present at all. Bigger values mean + the model will be less likely to repeat the same token in the text. + - `frequency_penalty`: What penalty to apply if a token has already been generated in the text. + Bigger values mean the model will be less likely to repeat the same token in the text. + - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the + values are the bias to add to that token. + - `openai_organization`: The OpenAI organization ID. + + See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. + """ + system_prompt = system_prompt if system_prompt is not None else self.system_prompt + if system_prompt: + system_message = ChatMessage(content=system_prompt, role="system") + chats = [] + for prompt in prompts: + message = ChatMessage(content=prompt, role="user") + if system_prompt: + chats.append([system_message, message]) + else: + chats.append([message]) + + replies, metadata = [], [] + for chat in chats: + reply, meta = self.llm.complete( + chat=chat, + api_key=api_key, + model_name=model_name, + model_parameters=model_parameters, + streaming_callback=streaming_callback, + api_base_url=api_base_url, + ) + replies.append(reply) + metadata.append(meta) + + return {"replies": replies, "metadata": metadata} diff --git a/haystack/preview/llm_backends/openai/_helpers.py b/haystack/preview/llm_backends/openai/_helpers.py index 431fd72ef6..f87611d4fc 100644 --- a/haystack/preview/llm_backends/openai/_helpers.py +++ b/haystack/preview/llm_backends/openai/_helpers.py @@ -47,15 +47,6 @@ ) -def default_streaming_callback(token: str, **kwargs): - """ - Default callback function for streaming responses from OpenAI API. - Prints the tokens to stdout as soon as they are received and returns them. - """ - print(token, flush=True, end="") - return token - - @openai_retry def complete(url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Tuple[List[str], List[Dict[str, Any]]]: """ diff --git a/haystack/preview/llm_backends/openai/chatgpt.py b/haystack/preview/llm_backends/openai/chatgpt.py index 34e2e4211c..6b00f090f0 100644 --- a/haystack/preview/llm_backends/openai/chatgpt.py +++ b/haystack/preview/llm_backends/openai/chatgpt.py @@ -58,7 +58,7 @@ def __init__( Creates an instance of ChatGPTGenerator for OpenAI's GPT-3.5 model. :param api_key: The OpenAI API key. - :param model_name: The name or path of the underlying model. + :param model_name: The name of the model to use. :param streaming_callback: A callback function that is called when a new token is received from the stream. The callback function should accept two parameters: the token received from the stream and **kwargs. The callback function should return the token to be sent to the stream. If the callback function is not @@ -126,7 +126,7 @@ def complete( :param chat: The chat to be sent to the generative model. :param api_key: The OpenAI API key. - :param model_name: The name or path of the underlying model. + :param model_name: The name of the model to use. :param streaming_callback: A callback function that is called when a new token is received from the stream. The callback function should accept two parameters: the token received from the stream and **kwargs. The callback function should return the token to be sent to the stream. If the callback function is not diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py new file mode 100644 index 0000000000..944dd84d5e --- /dev/null +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -0,0 +1,149 @@ +from unittest.mock import patch + +import pytest + +from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator +from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback +from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend, DEFAULT_OPENAI_PARAMS + + +class TestChatGPTGenerator: + @pytest.mark.unit + def test_init_default(self, caplog): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTGenerator() + assert component.system_prompt is None + assert component.llm.api_key is None + assert component.llm.model_name == "gpt-3.5-turbo" + assert component.llm.streaming_callback is None + assert component.llm.api_base_url == "https://api.openai.com/v1" + assert component.llm.model_parameters == DEFAULT_OPENAI_PARAMS + assert isinstance(component.llm, ChatGPTBackend) + + @pytest.mark.unit + def test_init_with_parameters(self, caplog): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + callback = lambda x: x + component = ChatGPTGenerator( + api_key="test-api-key", + model_name="gpt-4", + system_prompt="test-system-prompt", + model_parameters={"max_tokens": 10, "some-test-param": "test-params"}, + streaming_callback=callback, + api_base_url="test-base-url", + ) + assert component.system_prompt == "test-system-prompt" + assert component.llm.api_key == "test-api-key" + assert component.llm.model_name == "gpt-4" + assert component.llm.streaming_callback == callback + assert component.llm.api_base_url == "test-base-url" + assert component.llm.model_parameters == { + **DEFAULT_OPENAI_PARAMS, + "max_tokens": 10, + "some-test-param": "test-params", + } + + @pytest.mark.unit + def test_to_dict_default(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTGenerator() + data = component.to_dict() + assert data == { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": None, + "model_name": "gpt-3.5-turbo", + "system_prompt": None, + "model_parameters": DEFAULT_OPENAI_PARAMS, + "streaming_callback": None, + "api_base_url": "https://api.openai.com/v1", + }, + } + + @pytest.mark.unit + def test_to_dict_with_parameters(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTGenerator( + api_key="test-api-key", + model_name="gpt-4", + system_prompt="test-system-prompt", + model_parameters={"max_tokens": 10, "some-test-params": "test-params"}, + streaming_callback=default_streaming_callback, + api_base_url="test-base-url", + ) + data = component.to_dict() + assert data == { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": "test-api-key", + "model_name": "gpt-4", + "system_prompt": "test-system-prompt", + "model_parameters": {**DEFAULT_OPENAI_PARAMS, "max_tokens": 10, "some-test-params": "test-params"}, + "api_base_url": "test-base-url", + "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", + }, + } + + @pytest.mark.unit + def test_from_dict(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + data = { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": "test-api-key", + "model_name": "gpt-4", + "system_prompt": "test-system-prompt", + "model_parameters": {"max_tokens": 10, "some-test-params": "test-params"}, + "api_base_url": "test-base-url", + "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", + }, + } + component = ChatGPTGenerator.from_dict(data) + assert component.system_prompt == "test-system-prompt" + assert component.llm.api_key == "test-api-key" + assert component.llm.model_name == "gpt-4" + assert component.llm.streaming_callback == default_streaming_callback + assert component.llm.api_base_url == "test-base-url" + assert component.llm.model_parameters == { + **DEFAULT_OPENAI_PARAMS, + "max_tokens": 10, + "some-test-params": "test-params", + } + + @pytest.mark.unit + def test_run_no_api_key(self): + with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: + component = ChatGPTGenerator() + with pytest.raises(ValueError, match="OpenAI API key is missing. Please provide an API key."): + component.run(prompts=["test"]) + + @pytest.mark.unit + def test_run_no_system_prompt(self): + with patch("haystack.preview.components.generators.openai.chatgpt.ChatGPTBackend") as chatgpt_patch: + chatgpt_patch.return_value.complete.side_effect = lambda chat, **kwargs: ( + [f"{msg.role}: {msg.content}" for msg in chat], + {"some_info": None}, + ) + component = ChatGPTGenerator(api_key="test-api-key") + results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + assert results == { + "replies": [["user: test-prompt-1"], ["user: test-prompt-2"]], + "metadata": [{"some_info": None}, {"some_info": None}], + } + + @pytest.mark.unit + def test_run_with_system_prompt(self): + with patch("haystack.preview.components.generators.openai.chatgpt.ChatGPTBackend") as chatgpt_patch: + chatgpt_patch.return_value.complete.side_effect = lambda chat, **kwargs: ( + [f"{msg.role}: {msg.content}" for msg in chat], + {"some_info": None}, + ) + component = ChatGPTGenerator(api_key="test-api-key", system_prompt="test-system-prompt") + results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + assert results == { + "replies": [ + ["system: test-system-prompt", "user: test-prompt-1"], + ["system: test-system-prompt", "user: test-prompt-2"], + ], + "metadata": [{"some_info": None}, {"some_info": None}], + } diff --git a/test/preview/components/generators/openai/test_openai_helpers.py b/test/preview/components/generators/openai/test_openai_helpers.py deleted file mode 100644 index 736d7f3dd5..0000000000 --- a/test/preview/components/generators/openai/test_openai_helpers.py +++ /dev/null @@ -1,252 +0,0 @@ -from unittest.mock import Mock, patch -import json - -import pytest - -from haystack.preview.llm_backends.openai.errors import OpenAIUnauthorizedError, OpenAIError, OpenAIRateLimitError -from haystack.preview.llm_backends.openai._helpers import ( - ChatMessage, - raise_for_status, - check_truncated_answers, - complete, - complete_stream, - enforce_token_limit, - enforce_token_limit_chat, - OPENAI_TIMEOUT, - OPENAI_MAX_RETRIES, -) - - -@pytest.mark.unit -def test_raise_for_status_200(): - response = Mock() - response.status_code = 200 - raise_for_status(response) - - -@pytest.mark.unit -def test_raise_for_status_401(): - response = Mock() - response.status_code = 401 - with pytest.raises(OpenAIUnauthorizedError): - raise_for_status(response) - - -@pytest.mark.unit -def test_raise_for_status_429(): - response = Mock() - response.status_code = 429 - with pytest.raises(OpenAIRateLimitError): - raise_for_status(response) - - -@pytest.mark.unit -def test_raise_for_status_500(): - response = Mock() - response.status_code = 500 - response.text = "Internal Server Error" - with pytest.raises(OpenAIError): - raise_for_status(response) - - -@pytest.mark.unit -def test_check_truncated_answers(caplog): - result = { - "choices": [ - {"finish_reason": "length"}, - {"finish_reason": "content_filter"}, - {"finish_reason": "length"}, - {"finish_reason": "stop"}, - ] - } - payload = {"n": 4} - check_truncated_answers(result, payload) - assert caplog.records[0].message == ( - "2 out of the 4 completions have been truncated before reaching a natural " - "stopping point. Increase the max_tokens parameter to allow for longer completions." - ) - - -@pytest.mark.unit -def test_query_chat_model(): - with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: - response = Mock() - response.status_code = 200 - response.text = """ - { - "model": "test-model", - "choices": [ - { - "index": 0, - "finish_reason": "stop", - "message": {"content": " Hello, how are you? "} - } - ], - "usage": { - "prompt_tokens": 4, - "completion_tokens": 5, - "total_tokens": 9 - } - - }""" - mock_post.return_value = response - replies, metadata = complete(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) - mock_post.assert_called_once_with( - "test-url", - headers={"header": "test-header"}, - data=json.dumps({"param": "test-param"}), - timeout=OPENAI_TIMEOUT, - ) - assert replies == ["Hello, how are you?"] - assert metadata == [ - { - "model": "test-model", - "index": 0, - "finish_reason": "stop", - "prompt_tokens": 4, - "completion_tokens": 5, - "total_tokens": 9, - } - ] - - -@pytest.mark.unit -def test_query_chat_model_fail(): - with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: - response = Mock() - response.status_code = 500 - mock_post.return_value = response - with pytest.raises(OpenAIError): - complete(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) - mock_post.assert_called_with( - "test-url", - headers={"header": "test-header"}, - data=json.dumps({"param": "test-param"}), - timeout=OPENAI_TIMEOUT, - ) - mock_post.call_count == OPENAI_MAX_RETRIES - - -def mock_chat_completion_stream(model="test-model", index=0, token="test", finish_reason="stop"): - return Mock( - data=f"""{{ - "model": "{model}", - "choices": [ - {{ - "index": {index}, - "delta": {{"content": "{token}"}}, - "finish_reason": "{finish_reason}" - }} - ] - }}""" - ) - - -@pytest.mark.unit -def test_query_chat_model_stream(): - with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: - with patch("haystack.preview.llm_backends.openai._helpers.sseclient.SSEClient") as mock_sseclient: - callback = lambda token, event_data: f"|{token}|" - response = Mock() - response.status_code = 200 - - mock_sseclient.return_value.events.return_value = [ - mock_chat_completion_stream(token="Hello"), - mock_chat_completion_stream(token=","), - mock_chat_completion_stream(token=" how"), - mock_chat_completion_stream(token=" are"), - mock_chat_completion_stream(token=" you"), - mock_chat_completion_stream(token="?"), - Mock(data="[DONE]"), - mock_chat_completion_stream(token="discarded tokens"), - ] - - mock_post.return_value = response - replies, metadata = complete_stream( - url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback - ) - mock_post.assert_called_once_with( - "test-url", - headers={"header": "test-header"}, - data=json.dumps({"param": "test-param"}), - timeout=OPENAI_TIMEOUT, - stream=True, - ) - assert replies == ["|Hello||,|| how|| are|| you||?|"] - assert metadata == [{"model": "test-model", "index": 0, "finish_reason": "stop"}] - - -@pytest.mark.unit -def test_query_chat_model_stream_fail(): - with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: - callback = Mock() - response = Mock() - response.status_code = 500 - mock_post.return_value = response - with pytest.raises(OpenAIError): - complete_stream( - url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback - ) - mock_post.assert_called_with( - "test-url", - headers={"header": "test-header"}, - data=json.dumps({"param": "test-param"}), - timeout=OPENAI_TIMEOUT, - ) - mock_post.call_count == OPENAI_MAX_RETRIES - - -@pytest.mark.unit -def test_enforce_token_limit_above_limit(caplog, mock_tokenizer): - prompt = enforce_token_limit("This is a test prompt.", tokenizer=mock_tokenizer, max_tokens_limit=3) - assert prompt == "This is a" - assert caplog.records[0].message == ( - "The prompt has been truncated from 5 tokens to 3 tokens to fit within the max token " - "limit. Reduce the length of the prompt to prevent it from being cut off." - ) - - -@pytest.mark.unit -def test_enforce_token_limit_below_limit(caplog, mock_tokenizer): - prompt = enforce_token_limit("This is a test prompt.", tokenizer=mock_tokenizer, max_tokens_limit=100) - assert prompt == "This is a test prompt." - assert not caplog.records - - -@pytest.mark.unit -def test_enforce_token_limit_chat_above_limit(caplog, mock_tokenizer): - prompts = enforce_token_limit_chat( - [ - ChatMessage(content="System Prompt", role="system"), - ChatMessage(content="This is a test prompt.", role="user"), - ], - tokenizer=mock_tokenizer, - max_tokens_limit=7, - tokens_per_message_overhead=2, - ) - assert prompts == [ - ChatMessage(content="System Prompt", role="system"), - ChatMessage(content="This is a", role="user"), - ] - assert caplog.records[0].message == ( - "The chat have been truncated from 11 tokens to 7 tokens to fit within the max token limit. " - "Reduce the length of the chat to prevent it from being cut off." - ) - - -@pytest.mark.unit -def test_enforce_token_limit_chat_below_limit(caplog, mock_tokenizer): - prompts = enforce_token_limit_chat( - [ - ChatMessage(content="System Prompt", role="system"), - ChatMessage(content="This is a test prompt.", role="user"), - ], - tokenizer=mock_tokenizer, - max_tokens_limit=100, - tokens_per_message_overhead=2, - ) - assert prompts == [ - ChatMessage(content="System Prompt", role="system"), - ChatMessage(content="This is a test prompt.", role="user"), - ] - assert not caplog.records From 0a254146db3bc9b995939f2ad715d0106a2d4783 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 18:53:49 +0200 Subject: [PATCH 07/37] use openai sdk --- .../components/test_chatgpt_generator.py | 9 +- .../components/generators/openai/chatgpt.py | 119 +++++++++++++----- 2 files changed, 94 insertions(+), 34 deletions(-) diff --git a/e2e/preview/components/test_chatgpt_generator.py b/e2e/preview/components/test_chatgpt_generator.py index c3fad4038d..2a4e09c45e 100644 --- a/e2e/preview/components/test_chatgpt_generator.py +++ b/e2e/preview/components/test_chatgpt_generator.py @@ -37,9 +37,9 @@ class Callback: def __init__(self): self.responses = "" - def __call__(self, token, event_data): - self.responses += token - return token + def __call__(self, chunk): + self.responses += chunk.choices[0].delta.content if chunk.choices[0].delta else "" + return chunk callback = Callback() component = ChatGPTGenerator(os.environ.get("OPENAI_API_KEY"), streaming_callback=callback) @@ -57,6 +57,9 @@ def __call__(self, token, event_data): assert len(results["metadata"]) == 2 assert len(results["metadata"][0]) == 1 + + print(results["metadata"][0][0]) + assert "gpt-3.5-turbo" in results["metadata"][0][0]["model"] assert "stop" == results["metadata"][0][0]["finish_reason"] assert len(results["metadata"][1]) == 1 diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index fcb9047d75..8f70b448ea 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -3,9 +3,13 @@ import sys import builtins import logging +from dataclasses import asdict + +import openai from haystack.preview import component, default_from_dict, default_to_dict, DeserializationError -from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend + +# from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend from haystack.preview.llm_backends.chat_message import ChatMessage @@ -15,13 +19,14 @@ TOKENS_PER_MESSAGE_OVERHEAD = 4 -def default_streaming_callback(token: str, **kwargs): +def default_streaming_callback(chunk: Dict[str, Any]) -> Dict[str, Any]: """ Default callback function for streaming responses from OpenAI API. - Prints the tokens to stdout as soon as they are received and returns them. + Prints the tokens to stdout as soon as they are received and returns the chunk unchanged. """ - print(token, flush=True, end="") - return token + if chunk.choices.delta.content: + print(chunk.choices.delta.content, flush=True, end="") + return chunk @component @@ -75,38 +80,36 @@ def __init__( values are the bias to add to that token. - `openai_organization`: The OpenAI organization ID. """ - self.llm = ChatGPTBackend( - api_key=api_key, - model_name=model_name, - model_parameters=model_parameters, - streaming_callback=streaming_callback, - api_base_url=api_base_url, - ) + self.api_key = api_key + self.model_name = model_name self.system_prompt = system_prompt + self.model_parameters = model_parameters + self.streaming_callback = streaming_callback + self.api_base_url = api_base_url def to_dict(self) -> Dict[str, Any]: """ Serialize this component to a dictionary. """ - if self.llm.streaming_callback: - module = sys.modules.get(self.llm.streaming_callback.__module__) + if self.streaming_callback: + module = sys.modules.get(self.streaming_callback.__module__) if not module: raise ValueError("Could not locate the import module.") if module == builtins: - callback_name = self.llm.streaming_callback.__name__ + callback_name = self.streaming_callback.__name__ else: - callback_name = f"{module.__name__}.{self.llm.streaming_callback.__name__}" + callback_name = f"{module.__name__}.{self.streaming_callback.__name__}" else: callback_name = None return default_to_dict( self, - api_key=self.llm.api_key, - model_name=self.llm.model_name, - model_parameters=self.llm.model_parameters, + api_key=self.api_key, + model_name=self.model_name, + model_parameters=self.model_parameters, system_prompt=self.system_prompt, streaming_callback=callback_name, - api_base_url=self.llm.api_base_url, + api_base_url=self.api_base_url, ) @classmethod @@ -174,7 +177,13 @@ def run( See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. """ + api_key = api_key if api_key is not None else self.api_key + model_name = model_name if model_name is not None else self.model_name system_prompt = system_prompt if system_prompt is not None else self.system_prompt + model_parameters = model_parameters if model_parameters is not None else self.model_parameters + streaming_callback = streaming_callback if streaming_callback is not None else self.streaming_callback + api_base_url = api_base_url if api_base_url is not None else self.api_base_url + if system_prompt: system_message = ChatMessage(content=system_prompt, role="system") chats = [] @@ -185,17 +194,65 @@ def run( else: chats.append([message]) - replies, metadata = [], [] + all_replies, all_metadata = [], [] for chat in chats: - reply, meta = self.llm.complete( - chat=chat, - api_key=api_key, - model_name=model_name, - model_parameters=model_parameters, - streaming_callback=streaming_callback, - api_base_url=api_base_url, + completion = openai.ChatCompletion.create( + model=self.model_name, + api_key=self.api_key, + messages=[asdict(message) for message in chat], + stream=streaming_callback is not None, + **(self.model_parameters or model_parameters or {}), ) - replies.append(reply) - metadata.append(meta) + if streaming_callback: + replies = {} + metadata = {} + for chunk in completion: + chunk = streaming_callback(chunk) + for choice in chunk.choices: + if choice.index not in replies: + replies[choice.index] = "" + metadata[choice.index] = {} + + if hasattr(choice.delta, "content"): + replies[choice.index] += choice.delta.content + metadata[choice.index].update( + {"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason} + ) + all_replies.append(list(replies.values())) + all_metadata.append(list(metadata.values())) + check_truncated_answers(list(metadata.values())) + + else: + metadata = [ + { + "model": completion.model, + "index": choice.index, + "finish_reason": choice.finish_reason, + **completion.usage.__dict__, + } + for choice in completion.choices + ] + replies = [choice.message.content.strip() for choice in completion.choices] + all_replies.append(replies) + all_metadata.append(metadata) + check_truncated_answers(metadata) - return {"replies": replies, "metadata": metadata} + return {"replies": all_replies, "metadata": all_metadata} + + +def check_truncated_answers(metadata: List[List[Dict[str, Any]]]): + """ + Check the `finish_reason` the answers returned by OpenAI completions endpoint. + If the `finish_reason` is `length`, log a warning to the user. + + :param result: The result returned from the OpenAI API. + :param payload: The payload sent to the OpenAI API. + """ + truncated_completions = sum(1 for meta in metadata if meta.get("finish_reason") != "stop") + if truncated_completions > 0: + logger.warning( + "%s out of the %s completions have been truncated before reaching a natural stopping point. " + "Increase the max_tokens parameter to allow for longer completions.", + truncated_completions, + len(metadata), + ) From 5105ae8ae10da3f99f497c3a33e1defa51cee0e7 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 18:58:11 +0200 Subject: [PATCH 08/37] remove backend --- haystack/preview/llm_backends/__init__.py | 0 haystack/preview/llm_backends/chat_message.py | 7 - .../preview/llm_backends/openai/__init__.py | 0 .../preview/llm_backends/openai/_helpers.py | 222 --------------- .../preview/llm_backends/openai/chatgpt.py | 186 ------------- .../preview/llm_backends/openai/errors.py | 35 --- .../openai/test_chatgpt_generator.py | 64 +++-- .../llm_backends/test_chatgpt_backend.py | 186 ------------- .../llm_backends/test_openai_helpers.py | 252 ------------------ 9 files changed, 36 insertions(+), 916 deletions(-) delete mode 100644 haystack/preview/llm_backends/__init__.py delete mode 100644 haystack/preview/llm_backends/chat_message.py delete mode 100644 haystack/preview/llm_backends/openai/__init__.py delete mode 100644 haystack/preview/llm_backends/openai/_helpers.py delete mode 100644 haystack/preview/llm_backends/openai/chatgpt.py delete mode 100644 haystack/preview/llm_backends/openai/errors.py delete mode 100644 test/preview/llm_backends/test_chatgpt_backend.py delete mode 100644 test/preview/llm_backends/test_openai_helpers.py diff --git a/haystack/preview/llm_backends/__init__.py b/haystack/preview/llm_backends/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/haystack/preview/llm_backends/chat_message.py b/haystack/preview/llm_backends/chat_message.py deleted file mode 100644 index ca20f905f3..0000000000 --- a/haystack/preview/llm_backends/chat_message.py +++ /dev/null @@ -1,7 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class ChatMessage: - content: str - role: str diff --git a/haystack/preview/llm_backends/openai/__init__.py b/haystack/preview/llm_backends/openai/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/haystack/preview/llm_backends/openai/_helpers.py b/haystack/preview/llm_backends/openai/_helpers.py deleted file mode 100644 index f87611d4fc..0000000000 --- a/haystack/preview/llm_backends/openai/_helpers.py +++ /dev/null @@ -1,222 +0,0 @@ -from typing import List, Callable, Dict, Any, Tuple -import os -import logging -import json - -import tenacity -import requests -import sseclient - -from haystack.preview.lazy_imports import LazyImport -from haystack.preview.llm_backends.chat_message import ChatMessage -from haystack.preview.llm_backends.openai.errors import OpenAIError, OpenAIRateLimitError, OpenAIUnauthorizedError - -with LazyImport("Run 'pip install tiktoken'") as tiktoken_import: - import tiktoken - - -logger = logging.getLogger(__name__) - - -OPENAI_TIMEOUT = float(os.environ.get("HAYSTACK_REMOTE_API_TIMEOUT_SEC", 30)) -OPENAI_BACKOFF = int(os.environ.get("HAYSTACK_REMOTE_API_BACKOFF_SEC", 10)) -OPENAI_MAX_RETRIES = int(os.environ.get("HAYSTACK_REMOTE_API_MAX_RETRIES", 5)) -OPENAI_TOKENIZERS = { - **tiktoken.model.MODEL_TO_ENCODING, - "gpt-35-turbo": "cl100k_base", # https://github.com/openai/tiktoken/pull/72 -} -OPENAI_TOKENIZERS_TOKEN_LIMITS = { - "text-davinci": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3 - "gpt-35-turbo": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3-5 - "gpt-3.5-turbo": 4097, # Ref: https://platform.openai.com/docs/models/gpt-3-5 - "gpt-3.5-turbo-16k": 16384, # Ref: https://platform.openai.com/docs/models/gpt-3-5 - "gpt-3": 4096, # Ref: https://platform.openai.com/docs/models/gpt-3 - "gpt-4-32k": 32768, # Ref: https://platform.openai.com/docs/models/gpt-4 - "gpt-4": 8192, # Ref: https://platform.openai.com/docs/models/gpt-4 -} -OPENAI_STREAMING_DONE_MARKER = "[DONE]" # Ref: https://platform.openai.com/docs/api-reference/chat/create#stream - - -#: Retry on OpenAI errors -openai_retry = tenacity.retry( - reraise=True, - retry=tenacity.retry_if_exception_type(OpenAIError) - and tenacity.retry_if_not_exception_type(OpenAIUnauthorizedError), - wait=tenacity.wait_exponential(multiplier=OPENAI_BACKOFF), - stop=tenacity.stop_after_attempt(OPENAI_MAX_RETRIES), -) - - -@openai_retry -def complete(url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Tuple[List[str], List[Dict[str, Any]]]: - """ - Query ChatGPT without streaming the response. - - :param url: The URL to query. - :param headers: The headers to send with the request. - :param payload: The payload to send with the request. - :return: A list of strings containing the response from the OpenAI API. - """ - response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT) - raise_for_status(response=response) - json_response = json.loads(response.text) - check_truncated_answers(result=json_response, payload=payload) - metadata = [ - { - "model": json_response.get("model", None), - "index": choice.get("index", None), - "finish_reason": choice.get("finish_reason", None), - **json_response.get("usage", {}), - } - for choice in json_response.get("choices", []) - ] - replies = [choice["message"]["content"].strip() for choice in json_response.get("choices", [])] - return replies, metadata - - -@openai_retry -def complete_stream( - url: str, headers: Dict[str, str], payload: Dict[str, Any], callback: Callable -) -> Tuple[List[str], List[Dict[str, Any]]]: - """ - Query ChatGPT and streams the response. Once the stream finishes, returns a list of strings just like - self._query_llm() - - :param url: The URL to query. - :param headers: The headers to send with the request. - :param payload: The payload to send with the request. - :param callback: A callback function that is called when a new token is received from the stream. - The callback function should accept two parameters: the token received from the stream and **kwargs. - The callback function should return the token that will be returned at the end of the streaming. - :return: A list of strings containing the response from the OpenAI API. - """ - response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=OPENAI_TIMEOUT, stream=True) - raise_for_status(response=response) - - client = sseclient.SSEClient(response) # type: ignore - event_data = None - tokens = [] - try: - for event in client.events(): - if event.data == OPENAI_STREAMING_DONE_MARKER: - break - event_data = json.loads(event.data) - delta = event_data["choices"][0]["delta"] - token = delta["content"] if "content" in delta else None - if token: - tokens.append(callback(token, event_data=event_data["choices"])) - finally: - client.close() - metadata = ( - [ - { - "model": event_data.get("model", None), - "index": choice.get("index", None), - "finish_reason": choice.get("finish_reason", None), - } - for choice in event_data.get("choices", []) - ] - if event_data - else [] - ) - return ["".join(tokens)], metadata - - -def raise_for_status(response: requests.Response): - """ - Raises the appropriate OpenAI error in case of a bad response. - - :param response: The response returned from the OpenAI API. - :raises OpenAIError: If the response status code is not 200. - """ - if response.status_code >= 400: - if response.status_code == 429: - raise OpenAIRateLimitError(f"API rate limit exceeded: {response.text}") - if response.status_code == 401: - raise OpenAIUnauthorizedError(f"API key is invalid: {response.text}") - raise OpenAIError( - f"OpenAI returned an error.\n" f"Status code: {response.status_code}\n" f"Response body: {response.text}", - status_code=response.status_code, - ) - - -def check_truncated_answers(result: Dict[str, Any], payload: Dict[str, Any]): - """ - Check the `finish_reason` the answers returned by OpenAI completions endpoint. - If the `finish_reason` is `length`, log a warning to the user. - - :param result: The result returned from the OpenAI API. - :param payload: The payload sent to the OpenAI API. - """ - truncated_completions = sum(1 for ans in result["choices"] if ans["finish_reason"] == "length") - if truncated_completions > 0: - logger.warning( - "%s out of the %s completions have been truncated before reaching a natural stopping point. " - "Increase the max_tokens parameter to allow for longer completions.", - truncated_completions, - payload["n"], - ) - - -def enforce_token_limit(prompt: str, tokenizer: "tiktoken.Encoding", max_tokens_limit: int) -> str: - """ - Ensure that the length of the prompt is within the max tokens limit of the model. - If needed, truncate the prompt text so that it fits within the limit. - - :param prompt: Prompt text to be sent to the generative model. - :param tokenizer: The tokenizer used to encode the prompt. - :param max_tokens_limit: The max tokens limit of the model. - :return: The prompt text that fits within the max tokens limit of the model. - """ - tiktoken_import.check() - tokens = tokenizer.encode(prompt) - tokens_count = len(tokens) - if tokens_count > max_tokens_limit: - logger.warning( - "The prompt has been truncated from %s tokens to %s tokens to fit within the max token limit. " - "Reduce the length of the prompt to prevent it from being cut off.", - tokens_count, - max_tokens_limit, - ) - prompt = tokenizer.decode(tokens[:max_tokens_limit]) - return prompt - - -def enforce_token_limit_chat( - chat: List[ChatMessage], tokenizer: "tiktoken.Encoding", max_tokens_limit: int, tokens_per_message_overhead: int -) -> List[ChatMessage]: - """ - Ensure that the length of the chat is within the max tokens limit of the model. - If needed, truncate the messages so that the chat fits within the limit. - - :param chat: The chat messages to be sent to the generative model. - :param tokenizer: The tokenizer used to encode the chat. - :param max_tokens_limit: The max tokens limit of the model. - :param tokens_per_message_overhead: The number of tokens that are added to the prompt text for each message. - :return: A chat that fits within the max tokens limit of the model. - """ - messages_len = [len(tokenizer.encode(message.content)) + tokens_per_message_overhead for message in chat] - if (total_chat_length := sum(messages_len)) <= max_tokens_limit: - return chat - - logger.warning( - "The chat have been truncated from %s tokens to %s tokens to fit within the max token limit. " - "Reduce the length of the chat to prevent it from being cut off.", - total_chat_length, - max_tokens_limit, - ) - cut_messages = [] - cut_messages_len: List[int] = [] - for message, message_len in zip(chat, messages_len): - if sum(cut_messages_len) + message_len <= max_tokens_limit: - cut_messages.append(message) - cut_messages_len.append(message_len) - else: - remaining_tokens = max_tokens_limit - sum(cut_messages_len) - cut_messages.append( - ChatMessage( - content=enforce_token_limit(message.content, tokenizer, remaining_tokens), role=message.role - ) - ) - break - return cut_messages diff --git a/haystack/preview/llm_backends/openai/chatgpt.py b/haystack/preview/llm_backends/openai/chatgpt.py deleted file mode 100644 index 6b00f090f0..0000000000 --- a/haystack/preview/llm_backends/openai/chatgpt.py +++ /dev/null @@ -1,186 +0,0 @@ -from typing import Optional, List, Callable, Dict, Any - -import logging -from dataclasses import asdict - -from haystack.preview.lazy_imports import LazyImport -from haystack.preview.llm_backends.chat_message import ChatMessage -from haystack.preview.llm_backends.openai._helpers import ( - complete, - complete_stream, - enforce_token_limit_chat, - OPENAI_TOKENIZERS, - OPENAI_TOKENIZERS_TOKEN_LIMITS, -) - - -with LazyImport() as tiktoken_import: - import tiktoken - - -logger = logging.getLogger(__name__) - - -TOKENS_PER_MESSAGE_OVERHEAD = 4 -DEFAULT_OPENAI_PARAMS = { - "max_tokens": 500, - "temperature": 0.7, - "top_p": 1, - "n": 1, - "stop": [], - "presence_penalty": 0, - "frequency_penalty": 0, - "logit_bias": {}, - "stream": False, - "openai_organization": None, -} - - -class ChatGPTBackend: - """ - ChatGPT LLM interface. - - Queries ChatGPT using OpenAI's GPT-3 ChatGPT API. Invocations are made using REST API. - See [OpenAI ChatGPT API](https://platform.openai.com/docs/guides/chat) for more details. - """ - - # TODO support function calling! - - def __init__( - self, - api_key: Optional[str] = None, - model_name: str = "gpt-3.5-turbo", - model_parameters: Optional[Dict[str, Any]] = None, - streaming_callback: Optional[Callable] = None, - api_base_url: str = "https://api.openai.com/v1", - ): - """ - Creates an instance of ChatGPTGenerator for OpenAI's GPT-3.5 model. - - :param api_key: The OpenAI API key. - :param model_name: The name of the model to use. - :param streaming_callback: A callback function that is called when a new token is received from the stream. - The callback function should accept two parameters: the token received from the stream and **kwargs. - The callback function should return the token to be sent to the stream. If the callback function is not - provided, the token is printed to stdout. - :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. - :param model_parameters: A dictionary of parameters to use for the model. See OpenAI - [documentation](https://platform.openai.com/docs/api-reference/chat) for more details. Some of the supported - parameters: - - `max_tokens`: The maximum number of tokens the output text can have. - - `temperature`: What sampling temperature to use. Higher values means the model will take more risks. - Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. - - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model - considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens - comprising the top 10% probability mass are considered. - - `n`: How many completions to generate for each prompt. For example, if the LLM gets 3 prompts and n is 2, - it will generate two completions for each of the three prompts, ending up with 6 completions in total. - - `stop`: One or more sequences after which the LLM should stop generating tokens. - - `presence_penalty`: What penalty to apply if a token is already present at all. Bigger values mean - the model will be less likely to repeat the same token in the text. - - `frequency_penalty`: What penalty to apply if a token has already been generated in the text. - Bigger values mean the model will be less likely to repeat the same token in the text. - - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the - values are the bias to add to that token. - - `openai_organization`: The OpenAI organization ID. - - """ - if not api_key: - logger.warning("OpenAI API key is missing. You will need to provide an API key to Pipeline.run().") - - self.api_key = api_key - self.model_name = model_name - self.model_parameters = DEFAULT_OPENAI_PARAMS | (model_parameters or {}) - self.streaming_callback = streaming_callback - self.api_base_url = api_base_url - - tokenizer = None - for model_prefix, tokenizer_name in OPENAI_TOKENIZERS.items(): - if model_name.startswith(model_prefix): - tokenizer = tiktoken.get_encoding(tokenizer_name) - break - if not tokenizer: - raise ValueError(f"Tokenizer for model '{model_name}' not found.") - self.tokenizer = tokenizer - - max_tokens_limit = None - for model_prefix, limit in OPENAI_TOKENIZERS_TOKEN_LIMITS.items(): - if model_name.startswith(model_prefix): - max_tokens_limit = limit - break - if not max_tokens_limit: - raise ValueError(f"Max tokens limit for model '{model_name}' not found.") - self.max_tokens_limit = max_tokens_limit - - def complete( - self, - chat: List[ChatMessage], - api_key: Optional[str] = None, - model_name: Optional[str] = None, - model_parameters: Optional[Dict[str, Any]] = None, - streaming_callback: Optional[Callable] = None, - api_base_url: Optional[str] = None, - ): - """ - Queries the LLM with the prompts to produce replies. - - :param chat: The chat to be sent to the generative model. - :param api_key: The OpenAI API key. - :param model_name: The name of the model to use. - :param streaming_callback: A callback function that is called when a new token is received from the stream. - The callback function should accept two parameters: the token received from the stream and **kwargs. - The callback function should return the token to be sent to the stream. If the callback function is not - provided, the token is printed to stdout. - :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. - :param model_parameters: A dictionary of parameters to use for the model. See OpenAI - [documentation](https://platform.openai.com/docs/api-reference/chat) for more details. Some of the supported - parameters: - - `max_tokens`: The maximum number of tokens the output text can have. - - `temperature`: What sampling temperature to use. Higher values means the model will take more risks. - Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. - - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model - considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens - comprising the top 10% probability mass are considered. - - `n`: How many completions to generate for each prompt. For example, if the LLM gets 3 prompts and n is 2, - it will generate two completions for each of the three prompts, ending up with 6 completions in total. - - `stop`: One or more sequences after which the LLM should stop generating tokens. - - `presence_penalty`: What penalty to apply if a token is already present at all. Bigger values mean - the model will be less likely to repeat the same token in the text. - - `frequency_penalty`: What penalty to apply if a token has already been generated in the text. - Bigger values mean the model will be less likely to repeat the same token in the text. - - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the - values are the bias to add to that token. - - `openai_organization`: The OpenAI organization ID. - - """ - api_key = api_key if api_key is not None else self.api_key - - if not api_key: - raise ValueError("OpenAI API key is missing. Please provide an API key.") - - model_name = model_name or self.model_name - model_parameters = self.model_parameters | (model_parameters or {}) - streaming_callback = streaming_callback or self.streaming_callback - api_base_url = api_base_url or self.api_base_url - - openai_organization = model_parameters.pop("openai_organization", None) - headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} - if openai_organization: - headers["OpenAI-Organization"] = openai_organization - url = f"{api_base_url}/chat/completions" - - chat = enforce_token_limit_chat( - chat=chat, - tokenizer=self.tokenizer, - max_tokens_limit=self.max_tokens_limit, - tokens_per_message_overhead=TOKENS_PER_MESSAGE_OVERHEAD, - ) - payload = { - "model": model_name, - **model_parameters, - "stream": streaming_callback is not None, - "messages": [asdict(message) for message in chat], - } - if streaming_callback: - return complete_stream(url=url, headers=headers, payload=payload, callback=streaming_callback) - return complete(url=url, headers=headers, payload=payload) diff --git a/haystack/preview/llm_backends/openai/errors.py b/haystack/preview/llm_backends/openai/errors.py deleted file mode 100644 index 1787b4e17a..0000000000 --- a/haystack/preview/llm_backends/openai/errors.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Optional -from haystack.preview import ComponentError - - -class OpenAIError(ComponentError): - """Exception for issues that occur in the OpenAI APIs""" - - def __init__(self, message: Optional[str] = None, status_code: Optional[int] = None): - super().__init__() - self.message = message - self.status_code = status_code - - def __str__(self): - return self.message + f"(status code {self.status_code})" if self.status_code else "" - - -class OpenAIRateLimitError(OpenAIError): - """ - Rate limit error for OpenAI API (status code 429) - See https://help.openai.com/en/articles/5955604-how-can-i-solve-429-too-many-requests-errors - See https://help.openai.com/en/articles/5955598-is-api-usage-subject-to-any-rate-limits - """ - - def __init__(self, message: Optional[str] = None): - super().__init__(message=message, status_code=429) - - -class OpenAIUnauthorizedError(OpenAIError): - """ - Unauthorized error for OpenAI API (status code 401) - See https://platform.openai.com/docs/guides/error-codes/api-errors - """ - - def __init__(self, message: Optional[str] = None): - super().__init__(message=message, status_code=401) diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index 944dd84d5e..e99fedba94 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -3,8 +3,7 @@ import pytest from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator -from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback -from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend, DEFAULT_OPENAI_PARAMS +from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback, check_truncated_answers class TestChatGPTGenerator: @@ -13,12 +12,11 @@ def test_init_default(self, caplog): with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: component = ChatGPTGenerator() assert component.system_prompt is None - assert component.llm.api_key is None - assert component.llm.model_name == "gpt-3.5-turbo" - assert component.llm.streaming_callback is None - assert component.llm.api_base_url == "https://api.openai.com/v1" - assert component.llm.model_parameters == DEFAULT_OPENAI_PARAMS - assert isinstance(component.llm, ChatGPTBackend) + assert component.api_key is None + assert component.model_name == "gpt-3.5-turbo" + assert component.streaming_callback is None + assert component.api_base_url == "https://api.openai.com/v1" + assert component.model_parameters is None @pytest.mark.unit def test_init_with_parameters(self, caplog): @@ -33,15 +31,11 @@ def test_init_with_parameters(self, caplog): api_base_url="test-base-url", ) assert component.system_prompt == "test-system-prompt" - assert component.llm.api_key == "test-api-key" - assert component.llm.model_name == "gpt-4" - assert component.llm.streaming_callback == callback - assert component.llm.api_base_url == "test-base-url" - assert component.llm.model_parameters == { - **DEFAULT_OPENAI_PARAMS, - "max_tokens": 10, - "some-test-param": "test-params", - } + assert component.api_key == "test-api-key" + assert component.model_name == "gpt-4" + assert component.streaming_callback == callback + assert component.api_base_url == "test-base-url" + assert component.model_parameters == {"max_tokens": 10, "some-test-param": "test-params"} @pytest.mark.unit def test_to_dict_default(self): @@ -54,7 +48,7 @@ def test_to_dict_default(self): "api_key": None, "model_name": "gpt-3.5-turbo", "system_prompt": None, - "model_parameters": DEFAULT_OPENAI_PARAMS, + "model_parameters": None, "streaming_callback": None, "api_base_url": "https://api.openai.com/v1", }, @@ -78,7 +72,7 @@ def test_to_dict_with_parameters(self): "api_key": "test-api-key", "model_name": "gpt-4", "system_prompt": "test-system-prompt", - "model_parameters": {**DEFAULT_OPENAI_PARAMS, "max_tokens": 10, "some-test-params": "test-params"}, + "model_parameters": {"max_tokens": 10, "some-test-params": "test-params"}, "api_base_url": "test-base-url", "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", }, @@ -100,15 +94,11 @@ def test_from_dict(self): } component = ChatGPTGenerator.from_dict(data) assert component.system_prompt == "test-system-prompt" - assert component.llm.api_key == "test-api-key" - assert component.llm.model_name == "gpt-4" - assert component.llm.streaming_callback == default_streaming_callback - assert component.llm.api_base_url == "test-base-url" - assert component.llm.model_parameters == { - **DEFAULT_OPENAI_PARAMS, - "max_tokens": 10, - "some-test-params": "test-params", - } + assert component.api_key == "test-api-key" + assert component.model_name == "gpt-4" + assert component.streaming_callback == default_streaming_callback + assert component.api_base_url == "test-base-url" + assert component.model_parameters == {"max_tokens": 10, "some-test-params": "test-params"} @pytest.mark.unit def test_run_no_api_key(self): @@ -147,3 +137,21 @@ def test_run_with_system_prompt(self): ], "metadata": [{"some_info": None}, {"some_info": None}], } + + +@pytest.mark.unit +def test_check_truncated_answers(caplog): + result = { + "choices": [ + {"finish_reason": "length"}, + {"finish_reason": "content_filter"}, + {"finish_reason": "length"}, + {"finish_reason": "stop"}, + ] + } + payload = {"n": 4} + check_truncated_answers(result, payload) + assert caplog.records[0].message == ( + "2 out of the 4 completions have been truncated before reaching a natural " + "stopping point. Increase the max_tokens parameter to allow for longer completions." + ) diff --git a/test/preview/llm_backends/test_chatgpt_backend.py b/test/preview/llm_backends/test_chatgpt_backend.py deleted file mode 100644 index b00f287ef3..0000000000 --- a/test/preview/llm_backends/test_chatgpt_backend.py +++ /dev/null @@ -1,186 +0,0 @@ -from unittest.mock import patch, Mock - -import pytest - -from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend, ChatMessage - - -class TestChatGPTBackend: - @pytest.mark.unit - def test_init_default(self, caplog): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - component = ChatGPTBackend() - assert component.api_key is None - assert component.model_name == "gpt-3.5-turbo" - assert component.model_parameters == { - "max_tokens": 500, - "temperature": 0.7, - "top_p": 1, - "n": 1, - "stop": [], - "presence_penalty": 0, - "frequency_penalty": 0, - "logit_bias": {}, - "stream": False, - "openai_organization": None, - } - assert component.streaming_callback is None - assert component.api_base_url == "https://api.openai.com/v1" - assert component.max_tokens_limit == 4097 - - tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") - assert caplog.records[0].message == ( - "OpenAI API key is missing. You will need to provide an API key to Pipeline.run()." - ) - - @pytest.mark.unit - def test_init_with_parameters(self, caplog): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - callback = lambda x: x - component = ChatGPTBackend( - api_key="test-api-key", - model_name="gpt-4", - model_parameters={"max_tokens": 100, "extra-param": "value"}, - streaming_callback=callback, - api_base_url="test-base-url", - ) - assert component.api_key == "test-api-key" - assert component.model_name == "gpt-4" - assert component.model_parameters == { - "max_tokens": 100, - "temperature": 0.7, - "top_p": 1, - "n": 1, - "stop": [], - "presence_penalty": 0, - "frequency_penalty": 0, - "logit_bias": {}, - "stream": False, - "openai_organization": None, - "extra-param": "value", - } - assert component.streaming_callback == callback - assert component.api_base_url == "test-base-url" - assert component.max_tokens_limit == 8192 - - tiktoken_patch.get_encoding.assert_called_once_with("cl100k_base") - assert not caplog.records - - @pytest.mark.unit - def test_init_unknown_tokenizer(self): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - with pytest.raises(ValueError, match="Tokenizer for model 'test-another-model-name' not found."): - ChatGPTBackend(model_name="test-another-model-name") - - @pytest.mark.unit - def test_init_unknown_token_limit(self, monkeypatch): - monkeypatch.setattr( - "haystack.preview.llm_backends.openai.chatgpt.OPENAI_TOKENIZERS", {"test-model-name": "test-encoding"} - ) - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - with pytest.raises(ValueError, match="Max tokens limit for model 'test-model-name' not found."): - ChatGPTBackend(model_name="test-model-name") - - @pytest.mark.unit - def test_run_no_api_key(self): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - component = ChatGPTBackend() - with pytest.raises(ValueError, match="OpenAI API key is missing. Please provide an API key."): - component.complete(chat=[]) - - @pytest.mark.unit - def test_complete(self): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - with patch("haystack.preview.llm_backends.openai.chatgpt.complete") as complete_patch: - complete_patch.side_effect = lambda payload, **kwargs: ( - [ - f"Response for {payload['messages'][1]['content']}", - f"Another Response for {payload['messages'][1]['content']}", - ], - [{"metadata of": payload["messages"][1]["content"]}], - ) - component = ChatGPTBackend( - api_key="test-api-key", - model_parameters={"openai_organization": "test_orga_id"}, - api_base_url="test-base-url", - ) - - results = component.complete( - chat=[ - ChatMessage(content="test-prompt-system", role="system"), - ChatMessage(content="test-prompt-user", role="user"), - ] - ) - - assert results == ( - [f"Response for test-prompt-user", f"Another Response for test-prompt-user"], - [{"metadata of": "test-prompt-user"}], - ) - - complete_patch.call_count == 2 - complete_patch.assert_called_once_with( - url="test-base-url/chat/completions", - headers={ - "Authorization": f"Bearer test-api-key", - "Content-Type": "application/json", - "OpenAI-Organization": "test_orga_id", - }, - payload={ - "model": "gpt-3.5-turbo", - "max_tokens": 500, - "temperature": 0.7, - "top_p": 1, - "n": 1, - "stream": False, - "stop": [], - "presence_penalty": 0, - "frequency_penalty": 0, - "logit_bias": {}, - "messages": [ - {"role": "system", "content": "test-prompt-system"}, - {"role": "user", "content": "test-prompt-user"}, - ], - }, - ) - - @pytest.mark.unit - def test_complete_streaming(self): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - with patch("haystack.preview.llm_backends.openai.chatgpt.complete_stream") as complete_stream_patch: - complete_stream_patch.side_effect = lambda payload, **kwargs: ( - [f"Response for {payload['messages'][1]['content']}"], - [{"metadata of": payload["messages"][1]["content"]}], - ) - callback = Mock() - component = ChatGPTBackend(api_key="test-api-key", streaming_callback=callback) - - results = component.complete( - chat=[ - ChatMessage(content="test-prompt-system", role="system"), - ChatMessage(content="test-prompt-user", role="user"), - ] - ) - - assert results == (["Response for test-prompt-user"], [{"metadata of": "test-prompt-user"}]) - complete_stream_patch.call_count == 2 - complete_stream_patch.assert_any_call( - url="https://api.openai.com/v1/chat/completions", - headers={"Authorization": f"Bearer test-api-key", "Content-Type": "application/json"}, - payload={ - "model": "gpt-3.5-turbo", - "max_tokens": 500, - "temperature": 0.7, - "top_p": 1, - "n": 1, - "stream": True, - "stop": [], - "presence_penalty": 0, - "frequency_penalty": 0, - "logit_bias": {}, - "messages": [ - {"role": "system", "content": "test-prompt-system"}, - {"role": "user", "content": "test-prompt-user"}, - ], - }, - callback=callback, - ) diff --git a/test/preview/llm_backends/test_openai_helpers.py b/test/preview/llm_backends/test_openai_helpers.py deleted file mode 100644 index 736d7f3dd5..0000000000 --- a/test/preview/llm_backends/test_openai_helpers.py +++ /dev/null @@ -1,252 +0,0 @@ -from unittest.mock import Mock, patch -import json - -import pytest - -from haystack.preview.llm_backends.openai.errors import OpenAIUnauthorizedError, OpenAIError, OpenAIRateLimitError -from haystack.preview.llm_backends.openai._helpers import ( - ChatMessage, - raise_for_status, - check_truncated_answers, - complete, - complete_stream, - enforce_token_limit, - enforce_token_limit_chat, - OPENAI_TIMEOUT, - OPENAI_MAX_RETRIES, -) - - -@pytest.mark.unit -def test_raise_for_status_200(): - response = Mock() - response.status_code = 200 - raise_for_status(response) - - -@pytest.mark.unit -def test_raise_for_status_401(): - response = Mock() - response.status_code = 401 - with pytest.raises(OpenAIUnauthorizedError): - raise_for_status(response) - - -@pytest.mark.unit -def test_raise_for_status_429(): - response = Mock() - response.status_code = 429 - with pytest.raises(OpenAIRateLimitError): - raise_for_status(response) - - -@pytest.mark.unit -def test_raise_for_status_500(): - response = Mock() - response.status_code = 500 - response.text = "Internal Server Error" - with pytest.raises(OpenAIError): - raise_for_status(response) - - -@pytest.mark.unit -def test_check_truncated_answers(caplog): - result = { - "choices": [ - {"finish_reason": "length"}, - {"finish_reason": "content_filter"}, - {"finish_reason": "length"}, - {"finish_reason": "stop"}, - ] - } - payload = {"n": 4} - check_truncated_answers(result, payload) - assert caplog.records[0].message == ( - "2 out of the 4 completions have been truncated before reaching a natural " - "stopping point. Increase the max_tokens parameter to allow for longer completions." - ) - - -@pytest.mark.unit -def test_query_chat_model(): - with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: - response = Mock() - response.status_code = 200 - response.text = """ - { - "model": "test-model", - "choices": [ - { - "index": 0, - "finish_reason": "stop", - "message": {"content": " Hello, how are you? "} - } - ], - "usage": { - "prompt_tokens": 4, - "completion_tokens": 5, - "total_tokens": 9 - } - - }""" - mock_post.return_value = response - replies, metadata = complete(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) - mock_post.assert_called_once_with( - "test-url", - headers={"header": "test-header"}, - data=json.dumps({"param": "test-param"}), - timeout=OPENAI_TIMEOUT, - ) - assert replies == ["Hello, how are you?"] - assert metadata == [ - { - "model": "test-model", - "index": 0, - "finish_reason": "stop", - "prompt_tokens": 4, - "completion_tokens": 5, - "total_tokens": 9, - } - ] - - -@pytest.mark.unit -def test_query_chat_model_fail(): - with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: - response = Mock() - response.status_code = 500 - mock_post.return_value = response - with pytest.raises(OpenAIError): - complete(url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}) - mock_post.assert_called_with( - "test-url", - headers={"header": "test-header"}, - data=json.dumps({"param": "test-param"}), - timeout=OPENAI_TIMEOUT, - ) - mock_post.call_count == OPENAI_MAX_RETRIES - - -def mock_chat_completion_stream(model="test-model", index=0, token="test", finish_reason="stop"): - return Mock( - data=f"""{{ - "model": "{model}", - "choices": [ - {{ - "index": {index}, - "delta": {{"content": "{token}"}}, - "finish_reason": "{finish_reason}" - }} - ] - }}""" - ) - - -@pytest.mark.unit -def test_query_chat_model_stream(): - with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: - with patch("haystack.preview.llm_backends.openai._helpers.sseclient.SSEClient") as mock_sseclient: - callback = lambda token, event_data: f"|{token}|" - response = Mock() - response.status_code = 200 - - mock_sseclient.return_value.events.return_value = [ - mock_chat_completion_stream(token="Hello"), - mock_chat_completion_stream(token=","), - mock_chat_completion_stream(token=" how"), - mock_chat_completion_stream(token=" are"), - mock_chat_completion_stream(token=" you"), - mock_chat_completion_stream(token="?"), - Mock(data="[DONE]"), - mock_chat_completion_stream(token="discarded tokens"), - ] - - mock_post.return_value = response - replies, metadata = complete_stream( - url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback - ) - mock_post.assert_called_once_with( - "test-url", - headers={"header": "test-header"}, - data=json.dumps({"param": "test-param"}), - timeout=OPENAI_TIMEOUT, - stream=True, - ) - assert replies == ["|Hello||,|| how|| are|| you||?|"] - assert metadata == [{"model": "test-model", "index": 0, "finish_reason": "stop"}] - - -@pytest.mark.unit -def test_query_chat_model_stream_fail(): - with patch("haystack.preview.llm_backends.openai._helpers.requests.post") as mock_post: - callback = Mock() - response = Mock() - response.status_code = 500 - mock_post.return_value = response - with pytest.raises(OpenAIError): - complete_stream( - url="test-url", headers={"header": "test-header"}, payload={"param": "test-param"}, callback=callback - ) - mock_post.assert_called_with( - "test-url", - headers={"header": "test-header"}, - data=json.dumps({"param": "test-param"}), - timeout=OPENAI_TIMEOUT, - ) - mock_post.call_count == OPENAI_MAX_RETRIES - - -@pytest.mark.unit -def test_enforce_token_limit_above_limit(caplog, mock_tokenizer): - prompt = enforce_token_limit("This is a test prompt.", tokenizer=mock_tokenizer, max_tokens_limit=3) - assert prompt == "This is a" - assert caplog.records[0].message == ( - "The prompt has been truncated from 5 tokens to 3 tokens to fit within the max token " - "limit. Reduce the length of the prompt to prevent it from being cut off." - ) - - -@pytest.mark.unit -def test_enforce_token_limit_below_limit(caplog, mock_tokenizer): - prompt = enforce_token_limit("This is a test prompt.", tokenizer=mock_tokenizer, max_tokens_limit=100) - assert prompt == "This is a test prompt." - assert not caplog.records - - -@pytest.mark.unit -def test_enforce_token_limit_chat_above_limit(caplog, mock_tokenizer): - prompts = enforce_token_limit_chat( - [ - ChatMessage(content="System Prompt", role="system"), - ChatMessage(content="This is a test prompt.", role="user"), - ], - tokenizer=mock_tokenizer, - max_tokens_limit=7, - tokens_per_message_overhead=2, - ) - assert prompts == [ - ChatMessage(content="System Prompt", role="system"), - ChatMessage(content="This is a", role="user"), - ] - assert caplog.records[0].message == ( - "The chat have been truncated from 11 tokens to 7 tokens to fit within the max token limit. " - "Reduce the length of the chat to prevent it from being cut off." - ) - - -@pytest.mark.unit -def test_enforce_token_limit_chat_below_limit(caplog, mock_tokenizer): - prompts = enforce_token_limit_chat( - [ - ChatMessage(content="System Prompt", role="system"), - ChatMessage(content="This is a test prompt.", role="user"), - ], - tokenizer=mock_tokenizer, - max_tokens_limit=100, - tokens_per_message_overhead=2, - ) - assert prompts == [ - ChatMessage(content="System Prompt", role="system"), - ChatMessage(content="This is a test prompt.", role="user"), - ] - assert not caplog.records From 7d0c8e6dbede996324f6be3b439229ad9655eac5 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Mon, 4 Sep 2023 19:01:31 +0200 Subject: [PATCH 09/37] tests are broken --- .../components/generators/openai/chatgpt.py | 2 +- haystack/preview/dataclasses/chat_message.py | 7 +++++++ .../generators/openai/test_chatgpt_generator.py | 17 +++++++---------- 3 files changed, 15 insertions(+), 11 deletions(-) create mode 100644 haystack/preview/dataclasses/chat_message.py diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 8f70b448ea..0065290a72 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -10,7 +10,7 @@ from haystack.preview import component, default_from_dict, default_to_dict, DeserializationError # from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend -from haystack.preview.llm_backends.chat_message import ChatMessage +from haystack.preview.dataclasses.chat_message import ChatMessage logger = logging.getLogger(__name__) diff --git a/haystack/preview/dataclasses/chat_message.py b/haystack/preview/dataclasses/chat_message.py new file mode 100644 index 0000000000..ca20f905f3 --- /dev/null +++ b/haystack/preview/dataclasses/chat_message.py @@ -0,0 +1,7 @@ +from dataclasses import dataclass + + +@dataclass +class ChatMessage: + content: str + role: str diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index e99fedba94..e028f747c9 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -141,16 +141,13 @@ def test_run_with_system_prompt(self): @pytest.mark.unit def test_check_truncated_answers(caplog): - result = { - "choices": [ - {"finish_reason": "length"}, - {"finish_reason": "content_filter"}, - {"finish_reason": "length"}, - {"finish_reason": "stop"}, - ] - } - payload = {"n": 4} - check_truncated_answers(result, payload) + metadata = [ + {"finish_reason": "length"}, + {"finish_reason": "content_filter"}, + {"finish_reason": "length"}, + {"finish_reason": "stop"}, + ] + check_truncated_answers(metadata) assert caplog.records[0].message == ( "2 out of the 4 completions have been truncated before reaching a natural " "stopping point. Increase the max_tokens parameter to allow for longer completions." From de46d104220c1f9ad627be92987e4937442a18a9 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 11:44:53 +0200 Subject: [PATCH 10/37] fix tests --- .../components/generators/openai/chatgpt.py | 29 +- .../openai/test_chatgpt_generator.py | 370 +++++++++++++----- 2 files changed, 290 insertions(+), 109 deletions(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 0065290a72..75fc3a0222 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -22,10 +22,10 @@ def default_streaming_callback(chunk: Dict[str, Any]) -> Dict[str, Any]: """ Default callback function for streaming responses from OpenAI API. - Prints the tokens to stdout as soon as they are received and returns the chunk unchanged. + Prints the tokens of the first completion to stdout as soon as they are received and returns the chunk unchanged. """ - if chunk.choices.delta.content: - print(chunk.choices.delta.content, flush=True, end="") + if hasattr(chunk.choices[0].delta, "content"): + print(chunk.choices[0].delta.content, flush=True, end="") return chunk @@ -80,6 +80,9 @@ def __init__( values are the bias to add to that token. - `openai_organization`: The OpenAI organization ID. """ + if not api_key: + logger.warning("OpenAI API key is missing. You need to provide an API key to Pipeline.run().") + self.api_key = api_key self.model_name = model_name self.system_prompt = system_prompt @@ -178,11 +181,14 @@ def run( See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. """ api_key = api_key if api_key is not None else self.api_key - model_name = model_name if model_name is not None else self.model_name + if not api_key: + raise ValueError("OpenAI API key is missing. Please provide an API key.") + + model_name = model_name or self.model_name system_prompt = system_prompt if system_prompt is not None else self.system_prompt model_parameters = model_parameters if model_parameters is not None else self.model_parameters - streaming_callback = streaming_callback if streaming_callback is not None else self.streaming_callback - api_base_url = api_base_url if api_base_url is not None else self.api_base_url + streaming_callback = streaming_callback or self.streaming_callback + api_base_url = api_base_url or self.api_base_url if system_prompt: system_message = ChatMessage(content=system_prompt, role="system") @@ -215,9 +221,12 @@ def run( if hasattr(choice.delta, "content"): replies[choice.index] += choice.delta.content - metadata[choice.index].update( - {"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason} - ) + metadata[choice.index] = { + "model": chunk.model, + "index": choice.index, + "finish_reason": choice.finish_reason, + } + all_replies.append(list(replies.values())) all_metadata.append(list(metadata.values())) check_truncated_answers(list(metadata.values())) @@ -228,7 +237,7 @@ def run( "model": completion.model, "index": choice.index, "finish_reason": choice.finish_reason, - **completion.usage.__dict__, + "usage": dict(completion.usage.items()), } for choice in completion.choices ] diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index e028f747c9..3f008c1c36 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -1,148 +1,320 @@ -from unittest.mock import patch +from unittest.mock import patch, Mock +from copy import deepcopy import pytest +import openai +from openai.util import convert_to_openai_object from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback, check_truncated_answers +def mock_openai_response( + messages: str, stream: bool, model: str = "gpt-3.5-turbo-0301", **kwargs +) -> openai.ChatCompletion: + response = f"response for these messages --> {' - '.join(msg['role']+': '+msg['content'] for msg in messages)}" + base_dict = { + "id": "chatcmpl-7NaPEA6sgX7LnNPyKPbRlsyqLbr5V", + "object": "chat.completion", + "created": 1685855844, + "model": model, + "usage": {"prompt_tokens": 57, "completion_tokens": 40, "total_tokens": 97}, + } + base_dict["choices"] = [ + {"message": {"role": "assistant", "content": response}, "finish_reason": "stop", "index": "0"} + ] + return convert_to_openai_object(deepcopy(base_dict)) + + +def mock_openai_stream_response( + messages: str, stream: bool, model: str = "gpt-3.5-turbo-0301", **kwargs +) -> openai.ChatCompletion: + response = f"response for these messages --> {' - '.join(msg['role']+': '+msg['content'] for msg in messages)}" + base_dict = { + "id": "chatcmpl-7NaPEA6sgX7LnNPyKPbRlsyqLbr5V", + "object": "chat.completion", + "created": 1685855844, + "model": model, + } + base_dict["choices"] = [{"delta": {"role": "assistant"}, "finish_reason": None, "index": "0"}] + yield convert_to_openai_object(base_dict) + for token in response.split(): + base_dict["choices"][0]["delta"] = {"content": token + " "} + yield convert_to_openai_object(base_dict) + base_dict["choices"] = [{"delta": {"content": ""}, "finish_reason": "stop", "index": "0"}] + yield convert_to_openai_object(base_dict) + + class TestChatGPTGenerator: @pytest.mark.unit def test_init_default(self, caplog): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - component = ChatGPTGenerator() - assert component.system_prompt is None - assert component.api_key is None - assert component.model_name == "gpt-3.5-turbo" - assert component.streaming_callback is None - assert component.api_base_url == "https://api.openai.com/v1" - assert component.model_parameters is None + component = ChatGPTGenerator() + assert component.system_prompt is None + assert component.api_key is None + assert component.model_name == "gpt-3.5-turbo" + assert component.streaming_callback is None + assert component.api_base_url == "https://api.openai.com/v1" + assert component.model_parameters is None + assert ( + caplog.records[0].message == "OpenAI API key is missing. You need to provide an API key to Pipeline.run()." + ) @pytest.mark.unit def test_init_with_parameters(self, caplog): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - callback = lambda x: x - component = ChatGPTGenerator( - api_key="test-api-key", - model_name="gpt-4", - system_prompt="test-system-prompt", - model_parameters={"max_tokens": 10, "some-test-param": "test-params"}, - streaming_callback=callback, - api_base_url="test-base-url", - ) - assert component.system_prompt == "test-system-prompt" - assert component.api_key == "test-api-key" - assert component.model_name == "gpt-4" - assert component.streaming_callback == callback - assert component.api_base_url == "test-base-url" - assert component.model_parameters == {"max_tokens": 10, "some-test-param": "test-params"} + callback = lambda x: x + component = ChatGPTGenerator( + api_key="test-api-key", + model_name="gpt-4", + system_prompt="test-system-prompt", + model_parameters={"max_tokens": 10, "some-test-param": "test-params"}, + streaming_callback=callback, + api_base_url="test-base-url", + ) + assert component.system_prompt == "test-system-prompt" + assert component.api_key == "test-api-key" + assert component.model_name == "gpt-4" + assert component.streaming_callback == callback + assert component.api_base_url == "test-base-url" + assert component.model_parameters == {"max_tokens": 10, "some-test-param": "test-params"} + assert not caplog.records @pytest.mark.unit def test_to_dict_default(self): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - component = ChatGPTGenerator() - data = component.to_dict() - assert data == { - "type": "ChatGPTGenerator", - "init_parameters": { - "api_key": None, - "model_name": "gpt-3.5-turbo", - "system_prompt": None, - "model_parameters": None, - "streaming_callback": None, - "api_base_url": "https://api.openai.com/v1", - }, - } + component = ChatGPTGenerator() + data = component.to_dict() + assert data == { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": None, + "model_name": "gpt-3.5-turbo", + "system_prompt": None, + "model_parameters": None, + "streaming_callback": None, + "api_base_url": "https://api.openai.com/v1", + }, + } @pytest.mark.unit def test_to_dict_with_parameters(self): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - component = ChatGPTGenerator( - api_key="test-api-key", - model_name="gpt-4", - system_prompt="test-system-prompt", - model_parameters={"max_tokens": 10, "some-test-params": "test-params"}, - streaming_callback=default_streaming_callback, - api_base_url="test-base-url", - ) - data = component.to_dict() - assert data == { - "type": "ChatGPTGenerator", - "init_parameters": { - "api_key": "test-api-key", - "model_name": "gpt-4", - "system_prompt": "test-system-prompt", - "model_parameters": {"max_tokens": 10, "some-test-params": "test-params"}, - "api_base_url": "test-base-url", - "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", - }, - } + component = ChatGPTGenerator( + api_key="test-api-key", + model_name="gpt-4", + system_prompt="test-system-prompt", + model_parameters={"max_tokens": 10, "some-test-params": "test-params"}, + streaming_callback=default_streaming_callback, + api_base_url="test-base-url", + ) + data = component.to_dict() + assert data == { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": "test-api-key", + "model_name": "gpt-4", + "system_prompt": "test-system-prompt", + "model_parameters": {"max_tokens": 10, "some-test-params": "test-params"}, + "api_base_url": "test-base-url", + "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", + }, + } @pytest.mark.unit def test_from_dict(self): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - data = { - "type": "ChatGPTGenerator", - "init_parameters": { - "api_key": "test-api-key", - "model_name": "gpt-4", - "system_prompt": "test-system-prompt", - "model_parameters": {"max_tokens": 10, "some-test-params": "test-params"}, - "api_base_url": "test-base-url", - "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", - }, - } - component = ChatGPTGenerator.from_dict(data) - assert component.system_prompt == "test-system-prompt" - assert component.api_key == "test-api-key" - assert component.model_name == "gpt-4" - assert component.streaming_callback == default_streaming_callback - assert component.api_base_url == "test-base-url" - assert component.model_parameters == {"max_tokens": 10, "some-test-params": "test-params"} + data = { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": "test-api-key", + "model_name": "gpt-4", + "system_prompt": "test-system-prompt", + "model_parameters": {"max_tokens": 10, "some-test-params": "test-params"}, + "api_base_url": "test-base-url", + "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", + }, + } + component = ChatGPTGenerator.from_dict(data) + assert component.system_prompt == "test-system-prompt" + assert component.api_key == "test-api-key" + assert component.model_name == "gpt-4" + assert component.streaming_callback == default_streaming_callback + assert component.api_base_url == "test-base-url" + assert component.model_parameters == {"max_tokens": 10, "some-test-params": "test-params"} @pytest.mark.unit def test_run_no_api_key(self): - with patch("haystack.preview.llm_backends.openai.chatgpt.tiktoken") as tiktoken_patch: - component = ChatGPTGenerator() - with pytest.raises(ValueError, match="OpenAI API key is missing. Please provide an API key."): - component.run(prompts=["test"]) + component = ChatGPTGenerator() + with pytest.raises(ValueError, match="OpenAI API key is missing. Please provide an API key."): + component.run(prompts=["test"]) @pytest.mark.unit def test_run_no_system_prompt(self): - with patch("haystack.preview.components.generators.openai.chatgpt.ChatGPTBackend") as chatgpt_patch: - chatgpt_patch.return_value.complete.side_effect = lambda chat, **kwargs: ( - [f"{msg.role}: {msg.content}" for msg in chat], - {"some_info": None}, - ) + with patch( + "haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion.create" + ) as chatgpt_patch: + chatgpt_patch.side_effect = mock_openai_response component = ChatGPTGenerator(api_key="test-api-key") results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert results == { - "replies": [["user: test-prompt-1"], ["user: test-prompt-2"]], - "metadata": [{"some_info": None}, {"some_info": None}], + "replies": [ + ["response for these messages --> user: test-prompt-1"], + ["response for these messages --> user: test-prompt-2"], + ], + "metadata": [ + [ + { + "model": "gpt-3.5-turbo", + "index": "0", + "finish_reason": "stop", + "usage": {"prompt_tokens": 57, "completion_tokens": 40, "total_tokens": 97}, + } + ], + [ + { + "model": "gpt-3.5-turbo", + "index": "0", + "finish_reason": "stop", + "usage": {"prompt_tokens": 57, "completion_tokens": 40, "total_tokens": 97}, + } + ], + ], } + assert chatgpt_patch.call_count == 2 + chatgpt_patch.assert_any_call( + model="gpt-3.5-turbo", + api_key="test-api-key", + messages=[{"role": "user", "content": "test-prompt-1"}], + stream=False, + ) + chatgpt_patch.assert_any_call( + model="gpt-3.5-turbo", + api_key="test-api-key", + messages=[{"role": "user", "content": "test-prompt-2"}], + stream=False, + ) @pytest.mark.unit def test_run_with_system_prompt(self): - with patch("haystack.preview.components.generators.openai.chatgpt.ChatGPTBackend") as chatgpt_patch: - chatgpt_patch.return_value.complete.side_effect = lambda chat, **kwargs: ( - [f"{msg.role}: {msg.content}" for msg in chat], - {"some_info": None}, - ) + with patch( + "haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion.create" + ) as chatgpt_patch: + chatgpt_patch.side_effect = mock_openai_response component = ChatGPTGenerator(api_key="test-api-key", system_prompt="test-system-prompt") results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert results == { "replies": [ - ["system: test-system-prompt", "user: test-prompt-1"], - ["system: test-system-prompt", "user: test-prompt-2"], + ["response for these messages --> system: test-system-prompt - user: test-prompt-1"], + ["response for these messages --> system: test-system-prompt - user: test-prompt-2"], + ], + "metadata": [ + [ + { + "model": "gpt-3.5-turbo", + "index": "0", + "finish_reason": "stop", + "usage": {"prompt_tokens": 57, "completion_tokens": 40, "total_tokens": 97}, + } + ], + [ + { + "model": "gpt-3.5-turbo", + "index": "0", + "finish_reason": "stop", + "usage": {"prompt_tokens": 57, "completion_tokens": 40, "total_tokens": 97}, + } + ], + ], + } + assert chatgpt_patch.call_count == 2 + chatgpt_patch.assert_any_call( + model="gpt-3.5-turbo", + api_key="test-api-key", + messages=[ + {"role": "system", "content": "test-system-prompt"}, + {"role": "user", "content": "test-prompt-1"}, + ], + stream=False, + ) + chatgpt_patch.assert_any_call( + model="gpt-3.5-turbo", + api_key="test-api-key", + messages=[ + {"role": "system", "content": "test-system-prompt"}, + {"role": "user", "content": "test-prompt-2"}, + ], + stream=False, + ) + + @pytest.mark.unit + def test_run_with_parameters(self): + with patch( + "haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion.create" + ) as chatgpt_patch: + chatgpt_patch.side_effect = mock_openai_response + component = ChatGPTGenerator(api_key="test-api-key", model_parameters={"max_tokens": 10}) + component.run(prompts=["test-prompt-1", "test-prompt-2"]) + assert chatgpt_patch.call_count == 2 + chatgpt_patch.assert_any_call( + model="gpt-3.5-turbo", + api_key="test-api-key", + messages=[{"role": "user", "content": "test-prompt-1"}], + stream=False, + max_tokens=10, + ) + chatgpt_patch.assert_any_call( + model="gpt-3.5-turbo", + api_key="test-api-key", + messages=[{"role": "user", "content": "test-prompt-2"}], + stream=False, + max_tokens=10, + ) + + @pytest.mark.unit + def test_run_stream(self): + with patch( + "haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion.create" + ) as chatgpt_patch: + mock_callback = Mock() + mock_callback.side_effect = default_streaming_callback + chatgpt_patch.side_effect = mock_openai_stream_response + component = ChatGPTGenerator( + api_key="test-api-key", system_prompt="test-system-prompt", streaming_callback=mock_callback + ) + results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) + assert results == { + "replies": [ + ["response for these messages --> system: test-system-prompt - user: test-prompt-1 "], + ["response for these messages --> system: test-system-prompt - user: test-prompt-2 "], + ], + "metadata": [ + [{"model": "gpt-3.5-turbo", "index": "0", "finish_reason": "stop"}], + [{"model": "gpt-3.5-turbo", "index": "0", "finish_reason": "stop"}], ], - "metadata": [{"some_info": None}, {"some_info": None}], } + # Calls count: (10 tokens per prompt + 1 token for the role + 1 empty termination token) * 2 prompts + assert mock_callback.call_count == 24 + assert chatgpt_patch.call_count == 2 + chatgpt_patch.assert_any_call( + model="gpt-3.5-turbo", + api_key="test-api-key", + messages=[ + {"role": "system", "content": "test-system-prompt"}, + {"role": "user", "content": "test-prompt-1"}, + ], + stream=True, + ) + chatgpt_patch.assert_any_call( + model="gpt-3.5-turbo", + api_key="test-api-key", + messages=[ + {"role": "system", "content": "test-system-prompt"}, + {"role": "user", "content": "test-prompt-2"}, + ], + stream=True, + ) @pytest.mark.unit def test_check_truncated_answers(caplog): metadata = [ - {"finish_reason": "length"}, + {"finish_reason": "stop"}, {"finish_reason": "content_filter"}, {"finish_reason": "length"}, {"finish_reason": "stop"}, From 28d83f4550f29d8401f3c9e6ade298283f08cb81 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 11:47:56 +0200 Subject: [PATCH 11/37] stray param --- .../generators/openai/test_chatgpt_generator.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index 3f008c1c36..574d4da614 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -9,9 +9,7 @@ from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback, check_truncated_answers -def mock_openai_response( - messages: str, stream: bool, model: str = "gpt-3.5-turbo-0301", **kwargs -) -> openai.ChatCompletion: +def mock_openai_response(messages: str, model: str = "gpt-3.5-turbo-0301", **kwargs) -> openai.ChatCompletion: response = f"response for these messages --> {' - '.join(msg['role']+': '+msg['content'] for msg in messages)}" base_dict = { "id": "chatcmpl-7NaPEA6sgX7LnNPyKPbRlsyqLbr5V", @@ -26,9 +24,7 @@ def mock_openai_response( return convert_to_openai_object(deepcopy(base_dict)) -def mock_openai_stream_response( - messages: str, stream: bool, model: str = "gpt-3.5-turbo-0301", **kwargs -) -> openai.ChatCompletion: +def mock_openai_stream_response(messages: str, model: str = "gpt-3.5-turbo-0301", **kwargs) -> openai.ChatCompletion: response = f"response for these messages --> {' - '.join(msg['role']+': '+msg['content'] for msg in messages)}" base_dict = { "id": "chatcmpl-7NaPEA6sgX7LnNPyKPbRlsyqLbr5V", From 30b4bc3d84e7df5f08a4522a78d7befe8583e240 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 11:49:35 +0200 Subject: [PATCH 12/37] move _check_troncated_answers into the class --- .../components/generators/openai/chatgpt.py | 35 +++++++++---------- .../openai/test_chatgpt_generator.py | 28 +++++++-------- 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 75fc3a0222..361d77af27 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -229,7 +229,7 @@ def run( all_replies.append(list(replies.values())) all_metadata.append(list(metadata.values())) - check_truncated_answers(list(metadata.values())) + self.check_truncated_answers(list(metadata.values())) else: metadata = [ @@ -244,24 +244,23 @@ def run( replies = [choice.message.content.strip() for choice in completion.choices] all_replies.append(replies) all_metadata.append(metadata) - check_truncated_answers(metadata) + self.check_truncated_answers(metadata) return {"replies": all_replies, "metadata": all_metadata} + def check_truncated_answers(self, metadata: List[List[Dict[str, Any]]]): + """ + Check the `finish_reason` the answers returned by OpenAI completions endpoint. + If the `finish_reason` is `length`, log a warning to the user. -def check_truncated_answers(metadata: List[List[Dict[str, Any]]]): - """ - Check the `finish_reason` the answers returned by OpenAI completions endpoint. - If the `finish_reason` is `length`, log a warning to the user. - - :param result: The result returned from the OpenAI API. - :param payload: The payload sent to the OpenAI API. - """ - truncated_completions = sum(1 for meta in metadata if meta.get("finish_reason") != "stop") - if truncated_completions > 0: - logger.warning( - "%s out of the %s completions have been truncated before reaching a natural stopping point. " - "Increase the max_tokens parameter to allow for longer completions.", - truncated_completions, - len(metadata), - ) + :param result: The result returned from the OpenAI API. + :param payload: The payload sent to the OpenAI API. + """ + truncated_completions = sum(1 for meta in metadata if meta.get("finish_reason") != "stop") + if truncated_completions > 0: + logger.warning( + "%s out of the %s completions have been truncated before reaching a natural stopping point. " + "Increase the max_tokens parameter to allow for longer completions.", + truncated_completions, + len(metadata), + ) diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index 574d4da614..43072a8c51 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -306,17 +306,17 @@ def test_run_stream(self): stream=True, ) - -@pytest.mark.unit -def test_check_truncated_answers(caplog): - metadata = [ - {"finish_reason": "stop"}, - {"finish_reason": "content_filter"}, - {"finish_reason": "length"}, - {"finish_reason": "stop"}, - ] - check_truncated_answers(metadata) - assert caplog.records[0].message == ( - "2 out of the 4 completions have been truncated before reaching a natural " - "stopping point. Increase the max_tokens parameter to allow for longer completions." - ) + @pytest.mark.unit + def test_check_truncated_answers(caplog): + component = ChatGPTGenerator(api_key="test-api-key") + metadata = [ + {"finish_reason": "stop"}, + {"finish_reason": "content_filter"}, + {"finish_reason": "length"}, + {"finish_reason": "stop"}, + ] + component._check_truncated_answers(metadata) + assert caplog.records[0].message == ( + "2 out of the 4 completions have been truncated before reaching a natural " + "stopping point. Increase the max_tokens parameter to allow for longer completions." + ) From 1b744e4f6cde01dc7303b264a06c6b855a5d9545 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 11:50:10 +0200 Subject: [PATCH 13/37] wrong import --- .../components/generators/openai/test_chatgpt_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index 43072a8c51..9b30ab0cad 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -6,7 +6,7 @@ from openai.util import convert_to_openai_object from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator -from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback, check_truncated_answers +from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback def mock_openai_response(messages: str, model: str = "gpt-3.5-turbo-0301", **kwargs) -> openai.ChatCompletion: From ab0e45c84f4f7a3d660d69c5682d73c67def163f Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 11:50:58 +0200 Subject: [PATCH 14/37] rename function --- haystack/preview/components/generators/openai/chatgpt.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 361d77af27..8aa1f43a1e 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -229,7 +229,7 @@ def run( all_replies.append(list(replies.values())) all_metadata.append(list(metadata.values())) - self.check_truncated_answers(list(metadata.values())) + self._check_truncated_answers(list(metadata.values())) else: metadata = [ @@ -244,11 +244,11 @@ def run( replies = [choice.message.content.strip() for choice in completion.choices] all_replies.append(replies) all_metadata.append(metadata) - self.check_truncated_answers(metadata) + self._check_truncated_answers(metadata) return {"replies": all_replies, "metadata": all_metadata} - def check_truncated_answers(self, metadata: List[List[Dict[str, Any]]]): + def _check_truncated_answers(self, metadata: List[List[Dict[str, Any]]]): """ Check the `finish_reason` the answers returned by OpenAI completions endpoint. If the `finish_reason` is `length`, log a warning to the user. From fc7dc05eab86a09e48f8fa7baf27df61be55e2a1 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 11:51:42 +0200 Subject: [PATCH 15/37] typo in test --- .../components/generators/openai/test_chatgpt_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index 9b30ab0cad..2e897b58a5 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -307,7 +307,7 @@ def test_run_stream(self): ) @pytest.mark.unit - def test_check_truncated_answers(caplog): + def test_check_truncated_answers(self, caplog): component = ChatGPTGenerator(api_key="test-api-key") metadata = [ {"finish_reason": "stop"}, From 3e43dcde6a0f4dc7042f2b2a03a8006840096f28 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 12:21:13 +0200 Subject: [PATCH 16/37] add openai deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index c58d004b91..08bc665f3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ dependencies = [ # Preview "canals==0.7.0", + "openai", # Agent events "events", From c3381e3706dd768dafd22f1a169f45af53f58543 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 12:33:35 +0200 Subject: [PATCH 17/37] mypy --- .../components/generators/openai/chatgpt.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 8aa1f43a1e..4b12b05435 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -19,7 +19,7 @@ TOKENS_PER_MESSAGE_OVERHEAD = 4 -def default_streaming_callback(chunk: Dict[str, Any]) -> Dict[str, Any]: +def default_streaming_callback(chunk): """ Default callback function for streaming responses from OpenAI API. Prints the tokens of the first completion to stdout as soon as they are received and returns the chunk unchanged. @@ -209,27 +209,29 @@ def run( stream=streaming_callback is not None, **(self.model_parameters or model_parameters or {}), ) + + replies: List[str] + metadata: List[Dict[str, Any]] if streaming_callback: - replies = {} - metadata = {} + replies_dict = {} + metadata_dict: Dict[str, Dict[str, Any]] = {} for chunk in completion: chunk = streaming_callback(chunk) for choice in chunk.choices: - if choice.index not in replies: - replies[choice.index] = "" - metadata[choice.index] = {} + if choice.index not in replies_dict: + replies_dict[choice.index] = "" + metadata_dict[choice.index] = {} if hasattr(choice.delta, "content"): - replies[choice.index] += choice.delta.content - metadata[choice.index] = { + replies_dict[choice.index] += choice.delta.content + metadata_dict[choice.index] = { "model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason, } - - all_replies.append(list(replies.values())) - all_metadata.append(list(metadata.values())) - self._check_truncated_answers(list(metadata.values())) + all_replies.append(list(replies_dict.values())) + all_metadata.append(list(metadata_dict.values())) + self._check_truncated_answers(list(metadata_dict.values())) else: metadata = [ @@ -248,7 +250,7 @@ def run( return {"replies": all_replies, "metadata": all_metadata} - def _check_truncated_answers(self, metadata: List[List[Dict[str, Any]]]): + def _check_truncated_answers(self, metadata: List[Dict[str, Any]]): """ Check the `finish_reason` the answers returned by OpenAI completions endpoint. If the `finish_reason` is `length`, log a warning to the user. @@ -256,7 +258,7 @@ def _check_truncated_answers(self, metadata: List[List[Dict[str, Any]]]): :param result: The result returned from the OpenAI API. :param payload: The payload sent to the OpenAI API. """ - truncated_completions = sum(1 for meta in metadata if meta.get("finish_reason") != "stop") + truncated_completions = sum([1 for meta in metadata if meta.get("finish_reason") != "stop"]) if truncated_completions > 0: logger.warning( "%s out of the %s completions have been truncated before reaching a natural stopping point. " From 8d6f134e83ca78704921610d07a6557e991aadbc Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 15:28:46 +0200 Subject: [PATCH 18/37] improve system prompt docstring --- .../preview/components/generators/openai/chatgpt.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 4b12b05435..14edba34fd 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -54,7 +54,11 @@ def __init__( :param api_key: The OpenAI API key. :param model_name: The name of the model to use. - :param system_prompt: The prompt to be prepended to the user prompt. + :param system_prompt: An additional message to be sent to the LLM at the beginning of each conversation. + Typically, a conversation is formatted with a system message first, followed by alternating messages from + the 'user' (the "quesries") and the 'assistant' (the "responses"). The system message helps set the behavior + of the assistant. For example, you can modify the personality of the assistant or provide specific + instructions about how it should behave throughout the conversation. :param streaming_callback: A callback function that is called when a new token is received from the stream. The callback function should accept two parameters: the token received from the stream and **kwargs. The callback function should return the token to be sent to the stream. If the callback function is not @@ -152,7 +156,11 @@ def run( :param prompts: The prompts to be sent to the generative model. :param api_key: The OpenAI API key. :param model_name: The name of the model to use. - :param system_prompt: The prompt to be prepended to the user prompt. + :param system_prompt: An additional message to be sent to the LLM at the beginning of each conversation. + Typically, a conversation is formatted with a system message first, followed by alternating messages from + the 'user' (the "quesries") and the 'assistant' (the "responses"). The system message helps set the behavior + of the assistant. For example, you can modify the personality of the assistant or provide specific + instructions about how it should behave throughout the conversation. :param streaming_callback: A callback function that is called when a new token is received from the stream. The callback function should accept two parameters: the token received from the stream and **kwargs. The callback function should return the token to be sent to the stream. If the callback function is not From e1652f843f22e1953792a2da7f644d7e8da242cf Mon Sep 17 00:00:00 2001 From: Daria Fokina Date: Tue, 5 Sep 2023 16:11:25 +0200 Subject: [PATCH 19/37] typos update --- haystack/preview/components/generators/openai/chatgpt.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 14edba34fd..e353f41987 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -56,7 +56,7 @@ def __init__( :param model_name: The name of the model to use. :param system_prompt: An additional message to be sent to the LLM at the beginning of each conversation. Typically, a conversation is formatted with a system message first, followed by alternating messages from - the 'user' (the "quesries") and the 'assistant' (the "responses"). The system message helps set the behavior + the 'user' (the "queries") and the 'assistant' (the "responses"). The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. :param streaming_callback: A callback function that is called when a new token is received from the stream. @@ -68,7 +68,7 @@ def __init__( [documentation](https://platform.openai.com/docs/api-reference/chat) for more details. Some of the supported parameters: - `max_tokens`: The maximum number of tokens the output text can have. - - `temperature`: What sampling temperature to use. Higher values means the model will take more risks. + - `temperature`: What sampling temperature to use. Higher values mean the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens @@ -158,7 +158,7 @@ def run( :param model_name: The name of the model to use. :param system_prompt: An additional message to be sent to the LLM at the beginning of each conversation. Typically, a conversation is formatted with a system message first, followed by alternating messages from - the 'user' (the "quesries") and the 'assistant' (the "responses"). The system message helps set the behavior + the 'user' (the "queries") and the 'assistant' (the "responses"). The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. :param streaming_callback: A callback function that is called when a new token is received from the stream. @@ -170,7 +170,7 @@ def run( [documentation](https://platform.openai.com/docs/api-reference/chat) for more details. Some of the supported parameters: - `max_tokens`: The maximum number of tokens the output text can have. - - `temperature`: What sampling temperature to use. Higher values means the model will take more risks. + - `temperature`: What sampling temperature to use. Higher values mean the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens From 2a256b208d3f01c1d173dbdc628e09af2cf995a3 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 16:23:40 +0200 Subject: [PATCH 20/37] Update haystack/preview/components/generators/openai/chatgpt.py --- haystack/preview/components/generators/openai/chatgpt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index e353f41987..8888082d16 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -260,7 +260,7 @@ def run( def _check_truncated_answers(self, metadata: List[Dict[str, Any]]): """ - Check the `finish_reason` the answers returned by OpenAI completions endpoint. + Check the `finish_reason` returned with the OpenAI completions. If the `finish_reason` is `length`, log a warning to the user. :param result: The result returned from the OpenAI API. From 7178f2353ce05f003843f2ef718cfdcf5243912a Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 16:32:22 +0200 Subject: [PATCH 21/37] pylint --- haystack/preview/components/generators/openai/chatgpt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 14edba34fd..37e599a2b9 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -266,7 +266,7 @@ def _check_truncated_answers(self, metadata: List[Dict[str, Any]]): :param result: The result returned from the OpenAI API. :param payload: The payload sent to the OpenAI API. """ - truncated_completions = sum([1 for meta in metadata if meta.get("finish_reason") != "stop"]) + truncated_completions = sum(1 for meta in metadata if meta.get("finish_reason") != "stop") if truncated_completions > 0: logger.warning( "%s out of the %s completions have been truncated before reaching a natural stopping point. " From 155485f7dc1254fbde7eee58d2ac66f02ef767ce Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 17:43:23 +0200 Subject: [PATCH 22/37] Update haystack/preview/components/generators/openai/chatgpt.py Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> --- haystack/preview/components/generators/openai/chatgpt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 043e69da06..b6da384cb3 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -9,7 +9,6 @@ from haystack.preview import component, default_from_dict, default_to_dict, DeserializationError -# from haystack.preview.llm_backends.openai.chatgpt import ChatGPTBackend from haystack.preview.dataclasses.chat_message import ChatMessage From b2187c357bc8010f6c74ec2e493c78541c5ddb0c Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 17:56:10 +0200 Subject: [PATCH 23/37] Update haystack/preview/components/generators/openai/chatgpt.py Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> --- haystack/preview/components/generators/openai/chatgpt.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index b6da384cb3..935e05770f 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -98,13 +98,11 @@ def to_dict(self) -> Dict[str, Any]: Serialize this component to a dictionary. """ if self.streaming_callback: - module = sys.modules.get(self.streaming_callback.__module__) - if not module: - raise ValueError("Could not locate the import module.") - if module == builtins: + module = self.streaming_callback.__module__ + if module == "builtins": callback_name = self.streaming_callback.__name__ else: - callback_name = f"{module.__name__}.{self.streaming_callback.__name__}" + callback_name = f"{module}.{self.streaming_callback.__name__}" else: callback_name = None From ed08e342ecf705c729e455d5775d9b2e082fca46 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 17:57:12 +0200 Subject: [PATCH 24/37] Update haystack/preview/components/generators/openai/chatgpt.py Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> --- haystack/preview/components/generators/openai/chatgpt.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 935e05770f..5b497128f6 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -140,12 +140,6 @@ def from_dict(cls, data: Dict[str, Any]) -> "ChatGPTGenerator": def run( self, prompts: List[str], - api_key: Optional[str] = None, - model_name: str = "gpt-3.5-turbo", - system_prompt: Optional[str] = None, - model_parameters: Optional[Dict[str, Any]] = None, - streaming_callback: Optional[Callable] = None, - api_base_url: str = "https://api.openai.com/v1", ): """ Queries the LLM with the prompts to produce replies. From cc0bb7d8ed1c81248df2fe500a4b9b5e96be3555 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 18:46:07 +0200 Subject: [PATCH 25/37] review feedback --- .../components/generators/openai/chatgpt.py | 38 ++++++++----------- haystack/preview/dataclasses/chat_message.py | 7 ---- 2 files changed, 15 insertions(+), 30 deletions(-) delete mode 100644 haystack/preview/dataclasses/chat_message.py diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 5b497128f6..58f71d4874 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -1,21 +1,21 @@ from typing import Optional, List, Callable, Dict, Any import sys -import builtins import logging -from dataclasses import asdict +from dataclasses import dataclass, asdict import openai from haystack.preview import component, default_from_dict, default_to_dict, DeserializationError -from haystack.preview.dataclasses.chat_message import ChatMessage - logger = logging.getLogger(__name__) -TOKENS_PER_MESSAGE_OVERHEAD = 4 +@dataclass +class _ChatMessage: + content: str + role: str def default_streaming_callback(chunk): @@ -37,16 +37,14 @@ class ChatGPTGenerator: See [OpenAI ChatGPT API](https://platform.openai.com/docs/guides/chat) for more details. """ - # TODO support function calling! - def __init__( self, - api_key: Optional[str] = None, + api_key: str, model_name: str = "gpt-3.5-turbo", system_prompt: Optional[str] = None, - model_parameters: Optional[Dict[str, Any]] = None, streaming_callback: Optional[Callable] = None, api_base_url: str = "https://api.openai.com/v1", + **kwargs, ): """ Creates an instance of ChatGPTGenerator for OpenAI's GPT-3.5 model. @@ -63,9 +61,9 @@ def __init__( The callback function should return the token to be sent to the stream. If the callback function is not provided, the token is printed to stdout. :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. - :param model_parameters: A dictionary of parameters to use for the model. See OpenAI - [documentation](https://platform.openai.com/docs/api-reference/chat) for more details. Some of the supported - parameters: + :param kwargs: Other parameters to use for the model. These parameters are all sent directly to the OpenAI + endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat) for more details. + Some of the supported parameters: - `max_tokens`: The maximum number of tokens the output text can have. - `temperature`: What sampling temperature to use. Higher values mean the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. @@ -83,13 +81,10 @@ def __init__( values are the bias to add to that token. - `openai_organization`: The OpenAI organization ID. """ - if not api_key: - logger.warning("OpenAI API key is missing. You need to provide an API key to Pipeline.run().") - self.api_key = api_key self.model_name = model_name self.system_prompt = system_prompt - self.model_parameters = model_parameters + self.model_parameters = kwargs self.streaming_callback = streaming_callback self.api_base_url = api_base_url @@ -110,10 +105,10 @@ def to_dict(self) -> Dict[str, Any]: self, api_key=self.api_key, model_name=self.model_name, - model_parameters=self.model_parameters, system_prompt=self.system_prompt, streaming_callback=callback_name, api_base_url=self.api_base_url, + **self.model_parameters, ) @classmethod @@ -137,10 +132,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "ChatGPTGenerator": return default_from_dict(cls, data) @component.output_types(replies=List[List[str]], metadata=List[Dict[str, Any]]) - def run( - self, - prompts: List[str], - ): + def run(self, prompts: List[str]): """ Queries the LLM with the prompts to produce replies. @@ -190,10 +182,10 @@ def run( api_base_url = api_base_url or self.api_base_url if system_prompt: - system_message = ChatMessage(content=system_prompt, role="system") + system_message = _ChatMessage(content=system_prompt, role="system") chats = [] for prompt in prompts: - message = ChatMessage(content=prompt, role="user") + message = _ChatMessage(content=prompt, role="user") if system_prompt: chats.append([system_message, message]) else: diff --git a/haystack/preview/dataclasses/chat_message.py b/haystack/preview/dataclasses/chat_message.py deleted file mode 100644 index ca20f905f3..0000000000 --- a/haystack/preview/dataclasses/chat_message.py +++ /dev/null @@ -1,7 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class ChatMessage: - content: str - role: str From c58ab268a2c85aece6c08c653dc5cbdf6d2ea498 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 18:56:50 +0200 Subject: [PATCH 26/37] fix tests --- .../components/generators/openai/chatgpt.py | 24 ++----- .../openai/test_chatgpt_generator.py | 68 ++++++++++++------- 2 files changed, 49 insertions(+), 43 deletions(-) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 58f71d4874..5c6753695b 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -171,23 +171,11 @@ def run(self, prompts: List[str]): See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. """ - api_key = api_key if api_key is not None else self.api_key - if not api_key: - raise ValueError("OpenAI API key is missing. Please provide an API key.") - - model_name = model_name or self.model_name - system_prompt = system_prompt if system_prompt is not None else self.system_prompt - model_parameters = model_parameters if model_parameters is not None else self.model_parameters - streaming_callback = streaming_callback or self.streaming_callback - api_base_url = api_base_url or self.api_base_url - - if system_prompt: - system_message = _ChatMessage(content=system_prompt, role="system") chats = [] for prompt in prompts: message = _ChatMessage(content=prompt, role="user") - if system_prompt: - chats.append([system_message, message]) + if self.system_prompt: + chats.append([_ChatMessage(content=self.system_prompt, role="system"), message]) else: chats.append([message]) @@ -197,17 +185,17 @@ def run(self, prompts: List[str]): model=self.model_name, api_key=self.api_key, messages=[asdict(message) for message in chat], - stream=streaming_callback is not None, - **(self.model_parameters or model_parameters or {}), + stream=self.streaming_callback is not None, + **self.model_parameters, ) replies: List[str] metadata: List[Dict[str, Any]] - if streaming_callback: + if self.streaming_callback: replies_dict = {} metadata_dict: Dict[str, Dict[str, Any]] = {} for chunk in completion: - chunk = streaming_callback(chunk) + chunk = self.streaming_callback(chunk) for choice in chunk.choices: if choice.index not in replies_dict: replies_dict[choice.index] = "" diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index 2e897b58a5..860ab2bf25 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -43,26 +43,24 @@ def mock_openai_stream_response(messages: str, model: str = "gpt-3.5-turbo-0301" class TestChatGPTGenerator: @pytest.mark.unit - def test_init_default(self, caplog): - component = ChatGPTGenerator() + def test_init_default(self): + component = ChatGPTGenerator(api_key="test-api-key") assert component.system_prompt is None - assert component.api_key is None + assert component.api_key == "test-api-key" assert component.model_name == "gpt-3.5-turbo" assert component.streaming_callback is None assert component.api_base_url == "https://api.openai.com/v1" - assert component.model_parameters is None - assert ( - caplog.records[0].message == "OpenAI API key is missing. You need to provide an API key to Pipeline.run()." - ) + assert component.model_parameters == {} @pytest.mark.unit - def test_init_with_parameters(self, caplog): + def test_init_with_parameters(self): callback = lambda x: x component = ChatGPTGenerator( api_key="test-api-key", model_name="gpt-4", system_prompt="test-system-prompt", - model_parameters={"max_tokens": 10, "some-test-param": "test-params"}, + max_tokens=10, + some_test_param="test-params", streaming_callback=callback, api_base_url="test-base-url", ) @@ -71,20 +69,18 @@ def test_init_with_parameters(self, caplog): assert component.model_name == "gpt-4" assert component.streaming_callback == callback assert component.api_base_url == "test-base-url" - assert component.model_parameters == {"max_tokens": 10, "some-test-param": "test-params"} - assert not caplog.records + assert component.model_parameters == {"max_tokens": 10, "some_test_param": "test-params"} @pytest.mark.unit def test_to_dict_default(self): - component = ChatGPTGenerator() + component = ChatGPTGenerator(api_key="test-api-key") data = component.to_dict() assert data == { "type": "ChatGPTGenerator", "init_parameters": { - "api_key": None, + "api_key": "test-api-key", "model_name": "gpt-3.5-turbo", "system_prompt": None, - "model_parameters": None, "streaming_callback": None, "api_base_url": "https://api.openai.com/v1", }, @@ -96,7 +92,8 @@ def test_to_dict_with_parameters(self): api_key="test-api-key", model_name="gpt-4", system_prompt="test-system-prompt", - model_parameters={"max_tokens": 10, "some-test-params": "test-params"}, + max_tokens=10, + some_test_param="test-params", streaming_callback=default_streaming_callback, api_base_url="test-base-url", ) @@ -107,12 +104,38 @@ def test_to_dict_with_parameters(self): "api_key": "test-api-key", "model_name": "gpt-4", "system_prompt": "test-system-prompt", - "model_parameters": {"max_tokens": 10, "some-test-params": "test-params"}, + "max_tokens": 10, + "some_test_param": "test-params", "api_base_url": "test-base-url", "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", }, } + @pytest.mark.unit + def test_to_dict_with_lambda_streaming_callback(self): + component = ChatGPTGenerator( + api_key="test-api-key", + model_name="gpt-4", + system_prompt="test-system-prompt", + max_tokens=10, + some_test_param="test-params", + streaming_callback=lambda x: x, + api_base_url="test-base-url", + ) + data = component.to_dict() + assert data == { + "type": "ChatGPTGenerator", + "init_parameters": { + "api_key": "test-api-key", + "model_name": "gpt-4", + "system_prompt": "test-system-prompt", + "max_tokens": 10, + "some_test_param": "test-params", + "api_base_url": "test-base-url", + "streaming_callback": "test_chatgpt_generator.", + }, + } + @pytest.mark.unit def test_from_dict(self): data = { @@ -121,7 +144,8 @@ def test_from_dict(self): "api_key": "test-api-key", "model_name": "gpt-4", "system_prompt": "test-system-prompt", - "model_parameters": {"max_tokens": 10, "some-test-params": "test-params"}, + "max_tokens": 10, + "some_test_param": "test-params", "api_base_url": "test-base-url", "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", }, @@ -132,13 +156,7 @@ def test_from_dict(self): assert component.model_name == "gpt-4" assert component.streaming_callback == default_streaming_callback assert component.api_base_url == "test-base-url" - assert component.model_parameters == {"max_tokens": 10, "some-test-params": "test-params"} - - @pytest.mark.unit - def test_run_no_api_key(self): - component = ChatGPTGenerator() - with pytest.raises(ValueError, match="OpenAI API key is missing. Please provide an API key."): - component.run(prompts=["test"]) + assert component.model_parameters == {"max_tokens": 10, "some_test_param": "test-params"} @pytest.mark.unit def test_run_no_system_prompt(self): @@ -244,7 +262,7 @@ def test_run_with_parameters(self): "haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion.create" ) as chatgpt_patch: chatgpt_patch.side_effect = mock_openai_response - component = ChatGPTGenerator(api_key="test-api-key", model_parameters={"max_tokens": 10}) + component = ChatGPTGenerator(api_key="test-api-key", max_tokens=10) component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert chatgpt_patch.call_count == 2 chatgpt_patch.assert_any_call( From 835fd0ca53a19aacbcdd7c7386dd2b77b52af8f7 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 19:02:41 +0200 Subject: [PATCH 27/37] freview feedback --- .../openai/test_chatgpt_generator.py | 48 ++++++++----------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index 860ab2bf25..9c2249ceb0 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -160,10 +160,8 @@ def test_from_dict(self): @pytest.mark.unit def test_run_no_system_prompt(self): - with patch( - "haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion.create" - ) as chatgpt_patch: - chatgpt_patch.side_effect = mock_openai_response + with patch("haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion") as chatgpt_patch: + chatgpt_patch.create.side_effect = mock_openai_response component = ChatGPTGenerator(api_key="test-api-key") results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert results == { @@ -190,14 +188,14 @@ def test_run_no_system_prompt(self): ], ], } - assert chatgpt_patch.call_count == 2 - chatgpt_patch.assert_any_call( + assert chatgpt_patch.create.call_count == 2 + chatgpt_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[{"role": "user", "content": "test-prompt-1"}], stream=False, ) - chatgpt_patch.assert_any_call( + chatgpt_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[{"role": "user", "content": "test-prompt-2"}], @@ -206,10 +204,8 @@ def test_run_no_system_prompt(self): @pytest.mark.unit def test_run_with_system_prompt(self): - with patch( - "haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion.create" - ) as chatgpt_patch: - chatgpt_patch.side_effect = mock_openai_response + with patch("haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion") as chatgpt_patch: + chatgpt_patch.create.side_effect = mock_openai_response component = ChatGPTGenerator(api_key="test-api-key", system_prompt="test-system-prompt") results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert results == { @@ -236,8 +232,8 @@ def test_run_with_system_prompt(self): ], ], } - assert chatgpt_patch.call_count == 2 - chatgpt_patch.assert_any_call( + assert chatgpt_patch.create.call_count == 2 + chatgpt_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[ @@ -246,7 +242,7 @@ def test_run_with_system_prompt(self): ], stream=False, ) - chatgpt_patch.assert_any_call( + chatgpt_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[ @@ -258,21 +254,19 @@ def test_run_with_system_prompt(self): @pytest.mark.unit def test_run_with_parameters(self): - with patch( - "haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion.create" - ) as chatgpt_patch: - chatgpt_patch.side_effect = mock_openai_response + with patch("haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion") as chatgpt_patch: + chatgpt_patch.create.side_effect = mock_openai_response component = ChatGPTGenerator(api_key="test-api-key", max_tokens=10) component.run(prompts=["test-prompt-1", "test-prompt-2"]) - assert chatgpt_patch.call_count == 2 - chatgpt_patch.assert_any_call( + assert chatgpt_patch.create.call_count == 2 + chatgpt_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[{"role": "user", "content": "test-prompt-1"}], stream=False, max_tokens=10, ) - chatgpt_patch.assert_any_call( + chatgpt_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[{"role": "user", "content": "test-prompt-2"}], @@ -282,12 +276,10 @@ def test_run_with_parameters(self): @pytest.mark.unit def test_run_stream(self): - with patch( - "haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion.create" - ) as chatgpt_patch: + with patch("haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion") as chatgpt_patch: mock_callback = Mock() mock_callback.side_effect = default_streaming_callback - chatgpt_patch.side_effect = mock_openai_stream_response + chatgpt_patch.create.side_effect = mock_openai_stream_response component = ChatGPTGenerator( api_key="test-api-key", system_prompt="test-system-prompt", streaming_callback=mock_callback ) @@ -304,8 +296,8 @@ def test_run_stream(self): } # Calls count: (10 tokens per prompt + 1 token for the role + 1 empty termination token) * 2 prompts assert mock_callback.call_count == 24 - assert chatgpt_patch.call_count == 2 - chatgpt_patch.assert_any_call( + assert chatgpt_patch.create.call_count == 2 + chatgpt_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[ @@ -314,7 +306,7 @@ def test_run_stream(self): ], stream=True, ) - chatgpt_patch.assert_any_call( + chatgpt_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[ From 0eb43f954efdc480c63b77bafc67c82c0c1457e0 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Sep 2023 19:09:27 +0200 Subject: [PATCH 28/37] reno --- releasenotes/notes/chatgpt-llm-backend-d043532654efe684.yaml | 2 -- releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 releasenotes/notes/chatgpt-llm-backend-d043532654efe684.yaml create mode 100644 releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml diff --git a/releasenotes/notes/chatgpt-llm-backend-d043532654efe684.yaml b/releasenotes/notes/chatgpt-llm-backend-d043532654efe684.yaml deleted file mode 100644 index 31334d5c8d..0000000000 --- a/releasenotes/notes/chatgpt-llm-backend-d043532654efe684.yaml +++ /dev/null @@ -1,2 +0,0 @@ -preview: - - Introduce `ChatGPTBackend`, a class that will be used by LLM components to talk to OpenAI Chat models like ChatGPT and GPT4. Note that ChatGPTBackend itself is NOT a component. diff --git a/releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml b/releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml new file mode 100644 index 0000000000..baaf359496 --- /dev/null +++ b/releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml @@ -0,0 +1,2 @@ +preview: + - Introduce `ChatGPTGenerator`, a class that can generate completions using OpenAI Chat models like ChatGPT and GPT4. From e8d92ddc1db00980fbfc0b2716a5e7046936afcc Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Sep 2023 10:17:43 +0200 Subject: [PATCH 29/37] remove tenacity mock --- test/preview/conftest.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/test/preview/conftest.py b/test/preview/conftest.py index 377370bccf..d8882ea230 100644 --- a/test/preview/conftest.py +++ b/test/preview/conftest.py @@ -11,12 +11,3 @@ def mock_tokenizer(): tokenizer.encode = lambda text: text.split() tokenizer.decode = lambda tokens: " ".join(tokens) return tokenizer - - -@pytest.fixture(autouse=True) -def tenacity_wait(): - """ - Mocks tenacity's wait function to speed up tests. - """ - with patch("tenacity.nap.time"): - yield From 0aeb875cfc880d476d20bf1edcb57e3adcef6760 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Sep 2023 10:42:38 +0200 Subject: [PATCH 30/37] gpt35generator --- .../components/test_chatgpt_generator.py | 6 ++--- .../components/generators/openai/chatgpt.py | 4 ++-- .../openai/test_chatgpt_generator.py | 24 +++++++++---------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/e2e/preview/components/test_chatgpt_generator.py b/e2e/preview/components/test_chatgpt_generator.py index 2a4e09c45e..3816c1cc8d 100644 --- a/e2e/preview/components/test_chatgpt_generator.py +++ b/e2e/preview/components/test_chatgpt_generator.py @@ -1,6 +1,6 @@ import os import pytest -from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator +from haystack.preview.components.generators.openai.chatgpt import GPT35Generator @pytest.mark.skipif( @@ -8,7 +8,7 @@ reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", ) def test_chatgpt_generator_run(): - component = ChatGPTGenerator(api_key=os.environ.get("OPENAI_API_KEY")) + component = GPT35Generator(api_key=os.environ.get("OPENAI_API_KEY")) results = component.run( prompts=["What's the capital of France?", "What's the capital of Germany?"], model_parameters={"n": 1} ) @@ -42,7 +42,7 @@ def __call__(self, chunk): return chunk callback = Callback() - component = ChatGPTGenerator(os.environ.get("OPENAI_API_KEY"), streaming_callback=callback) + component = GPT35Generator(os.environ.get("OPENAI_API_KEY"), streaming_callback=callback) results = component.run( prompts=["What's the capital of France?", "What's the capital of Germany?"], model_parameters={"n": 1} ) diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/chatgpt.py index 5c6753695b..ad8d450e8f 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/chatgpt.py @@ -29,7 +29,7 @@ def default_streaming_callback(chunk): @component -class ChatGPTGenerator: +class GPT35Generator: """ ChatGPT LLM Generator. @@ -112,7 +112,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "ChatGPTGenerator": + def from_dict(cls, data: Dict[str, Any]) -> "GPT35Generator": """ Deserialize this component from a dictionary. """ diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index 9c2249ceb0..c24291e676 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -5,7 +5,7 @@ import openai from openai.util import convert_to_openai_object -from haystack.preview.components.generators.openai.chatgpt import ChatGPTGenerator +from haystack.preview.components.generators.openai.chatgpt import GPT35Generator from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback @@ -44,7 +44,7 @@ def mock_openai_stream_response(messages: str, model: str = "gpt-3.5-turbo-0301" class TestChatGPTGenerator: @pytest.mark.unit def test_init_default(self): - component = ChatGPTGenerator(api_key="test-api-key") + component = GPT35Generator(api_key="test-api-key") assert component.system_prompt is None assert component.api_key == "test-api-key" assert component.model_name == "gpt-3.5-turbo" @@ -55,7 +55,7 @@ def test_init_default(self): @pytest.mark.unit def test_init_with_parameters(self): callback = lambda x: x - component = ChatGPTGenerator( + component = GPT35Generator( api_key="test-api-key", model_name="gpt-4", system_prompt="test-system-prompt", @@ -73,7 +73,7 @@ def test_init_with_parameters(self): @pytest.mark.unit def test_to_dict_default(self): - component = ChatGPTGenerator(api_key="test-api-key") + component = GPT35Generator(api_key="test-api-key") data = component.to_dict() assert data == { "type": "ChatGPTGenerator", @@ -88,7 +88,7 @@ def test_to_dict_default(self): @pytest.mark.unit def test_to_dict_with_parameters(self): - component = ChatGPTGenerator( + component = GPT35Generator( api_key="test-api-key", model_name="gpt-4", system_prompt="test-system-prompt", @@ -113,7 +113,7 @@ def test_to_dict_with_parameters(self): @pytest.mark.unit def test_to_dict_with_lambda_streaming_callback(self): - component = ChatGPTGenerator( + component = GPT35Generator( api_key="test-api-key", model_name="gpt-4", system_prompt="test-system-prompt", @@ -150,7 +150,7 @@ def test_from_dict(self): "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", }, } - component = ChatGPTGenerator.from_dict(data) + component = GPT35Generator.from_dict(data) assert component.system_prompt == "test-system-prompt" assert component.api_key == "test-api-key" assert component.model_name == "gpt-4" @@ -162,7 +162,7 @@ def test_from_dict(self): def test_run_no_system_prompt(self): with patch("haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion") as chatgpt_patch: chatgpt_patch.create.side_effect = mock_openai_response - component = ChatGPTGenerator(api_key="test-api-key") + component = GPT35Generator(api_key="test-api-key") results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert results == { "replies": [ @@ -206,7 +206,7 @@ def test_run_no_system_prompt(self): def test_run_with_system_prompt(self): with patch("haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion") as chatgpt_patch: chatgpt_patch.create.side_effect = mock_openai_response - component = ChatGPTGenerator(api_key="test-api-key", system_prompt="test-system-prompt") + component = GPT35Generator(api_key="test-api-key", system_prompt="test-system-prompt") results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert results == { "replies": [ @@ -256,7 +256,7 @@ def test_run_with_system_prompt(self): def test_run_with_parameters(self): with patch("haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion") as chatgpt_patch: chatgpt_patch.create.side_effect = mock_openai_response - component = ChatGPTGenerator(api_key="test-api-key", max_tokens=10) + component = GPT35Generator(api_key="test-api-key", max_tokens=10) component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert chatgpt_patch.create.call_count == 2 chatgpt_patch.create.assert_any_call( @@ -280,7 +280,7 @@ def test_run_stream(self): mock_callback = Mock() mock_callback.side_effect = default_streaming_callback chatgpt_patch.create.side_effect = mock_openai_stream_response - component = ChatGPTGenerator( + component = GPT35Generator( api_key="test-api-key", system_prompt="test-system-prompt", streaming_callback=mock_callback ) results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) @@ -318,7 +318,7 @@ def test_run_stream(self): @pytest.mark.unit def test_check_truncated_answers(self, caplog): - component = ChatGPTGenerator(api_key="test-api-key") + component = GPT35Generator(api_key="test-api-key") metadata = [ {"finish_reason": "stop"}, {"finish_reason": "content_filter"}, From 9167e05e11470cba1bdfb8a2a6177d93f8d30532 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Sep 2023 11:30:26 +0200 Subject: [PATCH 31/37] fix naming --- .../generators/openai/test_chatgpt_generator.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_chatgpt_generator.py index c24291e676..390cd61f8d 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_chatgpt_generator.py @@ -41,7 +41,7 @@ def mock_openai_stream_response(messages: str, model: str = "gpt-3.5-turbo-0301" yield convert_to_openai_object(base_dict) -class TestChatGPTGenerator: +class TestGPT35Generator: @pytest.mark.unit def test_init_default(self): component = GPT35Generator(api_key="test-api-key") @@ -76,7 +76,7 @@ def test_to_dict_default(self): component = GPT35Generator(api_key="test-api-key") data = component.to_dict() assert data == { - "type": "ChatGPTGenerator", + "type": "GPT35Generator", "init_parameters": { "api_key": "test-api-key", "model_name": "gpt-3.5-turbo", @@ -99,7 +99,7 @@ def test_to_dict_with_parameters(self): ) data = component.to_dict() assert data == { - "type": "ChatGPTGenerator", + "type": "GPT35Generator", "init_parameters": { "api_key": "test-api-key", "model_name": "gpt-4", @@ -124,7 +124,7 @@ def test_to_dict_with_lambda_streaming_callback(self): ) data = component.to_dict() assert data == { - "type": "ChatGPTGenerator", + "type": "GPT35Generator", "init_parameters": { "api_key": "test-api-key", "model_name": "gpt-4", @@ -139,7 +139,7 @@ def test_to_dict_with_lambda_streaming_callback(self): @pytest.mark.unit def test_from_dict(self): data = { - "type": "ChatGPTGenerator", + "type": "GPT35Generator", "init_parameters": { "api_key": "test-api-key", "model_name": "gpt-4", From 941cc6676ca95f3f62e492fb6ac3d22a6006cbb4 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Sep 2023 15:12:44 +0200 Subject: [PATCH 32/37] remove stray references to chatgpt --- ...t_generator.py => test_gpt35_generator.py} | 8 ++- .../openai/{chatgpt.py => gpt35.py} | 8 +-- ...t_generator.py => test_gpt35_generator.py} | 50 +++++++++---------- 3 files changed, 32 insertions(+), 34 deletions(-) rename e2e/preview/components/{test_chatgpt_generator.py => test_gpt35_generator.py} (92%) rename haystack/preview/components/generators/openai/{chatgpt.py => gpt35.py} (97%) rename test/preview/components/generators/openai/{test_chatgpt_generator.py => test_gpt35_generator.py} (89%) diff --git a/e2e/preview/components/test_chatgpt_generator.py b/e2e/preview/components/test_gpt35_generator.py similarity index 92% rename from e2e/preview/components/test_chatgpt_generator.py rename to e2e/preview/components/test_gpt35_generator.py index 3816c1cc8d..9c49d2a8c8 100644 --- a/e2e/preview/components/test_chatgpt_generator.py +++ b/e2e/preview/components/test_gpt35_generator.py @@ -1,13 +1,13 @@ import os import pytest -from haystack.preview.components.generators.openai.chatgpt import GPT35Generator +from haystack.preview.components.generators.openai.gpt35 import GPT35Generator @pytest.mark.skipif( not os.environ.get("OPENAI_API_KEY", None), reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", ) -def test_chatgpt_generator_run(): +def test_gpt35_generator_run(): component = GPT35Generator(api_key=os.environ.get("OPENAI_API_KEY")) results = component.run( prompts=["What's the capital of France?", "What's the capital of Germany?"], model_parameters={"n": 1} @@ -32,7 +32,7 @@ def test_chatgpt_generator_run(): not os.environ.get("OPENAI_API_KEY", None), reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", ) -def test_chatgpt_generator_run_streaming(): +def test_gpt35_generator_run_streaming(): class Callback: def __init__(self): self.responses = "" @@ -58,8 +58,6 @@ def __call__(self, chunk): assert len(results["metadata"]) == 2 assert len(results["metadata"][0]) == 1 - print(results["metadata"][0][0]) - assert "gpt-3.5-turbo" in results["metadata"][0][0]["model"] assert "stop" == results["metadata"][0][0]["finish_reason"] assert len(results["metadata"][1]) == 1 diff --git a/haystack/preview/components/generators/openai/chatgpt.py b/haystack/preview/components/generators/openai/gpt35.py similarity index 97% rename from haystack/preview/components/generators/openai/chatgpt.py rename to haystack/preview/components/generators/openai/gpt35.py index ad8d450e8f..402dccde77 100644 --- a/haystack/preview/components/generators/openai/chatgpt.py +++ b/haystack/preview/components/generators/openai/gpt35.py @@ -31,10 +31,10 @@ def default_streaming_callback(chunk): @component class GPT35Generator: """ - ChatGPT LLM Generator. + LLM Generator compatible with GPT3.5 (ChatGPT) large language models. - Queries ChatGPT using OpenAI's GPT-3 ChatGPT API. Invocations are made using REST API. - See [OpenAI ChatGPT API](https://platform.openai.com/docs/guides/chat) for more details. + Queries the LLM using OpenAI's API. Invocations are made using OpenAI SDK ('openai' package) + See [OpenAI GPT3.5 API](https://platform.openai.com/docs/guides/chat) for more details. """ def __init__( @@ -47,7 +47,7 @@ def __init__( **kwargs, ): """ - Creates an instance of ChatGPTGenerator for OpenAI's GPT-3.5 model. + Creates an instance of GPT35Generator for OpenAI's GPT-3.5 model. :param api_key: The OpenAI API key. :param model_name: The name of the model to use. diff --git a/test/preview/components/generators/openai/test_chatgpt_generator.py b/test/preview/components/generators/openai/test_gpt35_generator.py similarity index 89% rename from test/preview/components/generators/openai/test_chatgpt_generator.py rename to test/preview/components/generators/openai/test_gpt35_generator.py index 390cd61f8d..c4bc9c512f 100644 --- a/test/preview/components/generators/openai/test_chatgpt_generator.py +++ b/test/preview/components/generators/openai/test_gpt35_generator.py @@ -5,8 +5,8 @@ import openai from openai.util import convert_to_openai_object -from haystack.preview.components.generators.openai.chatgpt import GPT35Generator -from haystack.preview.components.generators.openai.chatgpt import default_streaming_callback +from haystack.preview.components.generators.openai.gpt35 import GPT35Generator +from haystack.preview.components.generators.openai.gpt35 import default_streaming_callback def mock_openai_response(messages: str, model: str = "gpt-3.5-turbo-0301", **kwargs) -> openai.ChatCompletion: @@ -107,7 +107,7 @@ def test_to_dict_with_parameters(self): "max_tokens": 10, "some_test_param": "test-params", "api_base_url": "test-base-url", - "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", + "streaming_callback": "haystack.preview.components.generators.openai.gpt35.default_streaming_callback", }, } @@ -132,7 +132,7 @@ def test_to_dict_with_lambda_streaming_callback(self): "max_tokens": 10, "some_test_param": "test-params", "api_base_url": "test-base-url", - "streaming_callback": "test_chatgpt_generator.", + "streaming_callback": "test_gpt35_generator.", }, } @@ -147,7 +147,7 @@ def test_from_dict(self): "max_tokens": 10, "some_test_param": "test-params", "api_base_url": "test-base-url", - "streaming_callback": "haystack.preview.components.generators.openai.chatgpt.default_streaming_callback", + "streaming_callback": "haystack.preview.components.generators.openai.gpt35.default_streaming_callback", }, } component = GPT35Generator.from_dict(data) @@ -160,8 +160,8 @@ def test_from_dict(self): @pytest.mark.unit def test_run_no_system_prompt(self): - with patch("haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion") as chatgpt_patch: - chatgpt_patch.create.side_effect = mock_openai_response + with patch("haystack.preview.components.generators.openai.gpt35.openai.ChatCompletion") as gpt35_patch: + gpt35_patch.create.side_effect = mock_openai_response component = GPT35Generator(api_key="test-api-key") results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert results == { @@ -188,14 +188,14 @@ def test_run_no_system_prompt(self): ], ], } - assert chatgpt_patch.create.call_count == 2 - chatgpt_patch.create.assert_any_call( + assert gpt35_patch.create.call_count == 2 + gpt35_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[{"role": "user", "content": "test-prompt-1"}], stream=False, ) - chatgpt_patch.create.assert_any_call( + gpt35_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[{"role": "user", "content": "test-prompt-2"}], @@ -204,8 +204,8 @@ def test_run_no_system_prompt(self): @pytest.mark.unit def test_run_with_system_prompt(self): - with patch("haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion") as chatgpt_patch: - chatgpt_patch.create.side_effect = mock_openai_response + with patch("haystack.preview.components.generators.openai.gpt35.openai.ChatCompletion") as gpt35_patch: + gpt35_patch.create.side_effect = mock_openai_response component = GPT35Generator(api_key="test-api-key", system_prompt="test-system-prompt") results = component.run(prompts=["test-prompt-1", "test-prompt-2"]) assert results == { @@ -232,8 +232,8 @@ def test_run_with_system_prompt(self): ], ], } - assert chatgpt_patch.create.call_count == 2 - chatgpt_patch.create.assert_any_call( + assert gpt35_patch.create.call_count == 2 + gpt35_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[ @@ -242,7 +242,7 @@ def test_run_with_system_prompt(self): ], stream=False, ) - chatgpt_patch.create.assert_any_call( + gpt35_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[ @@ -254,19 +254,19 @@ def test_run_with_system_prompt(self): @pytest.mark.unit def test_run_with_parameters(self): - with patch("haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion") as chatgpt_patch: - chatgpt_patch.create.side_effect = mock_openai_response + with patch("haystack.preview.components.generators.openai.gpt35.openai.ChatCompletion") as gpt35_patch: + gpt35_patch.create.side_effect = mock_openai_response component = GPT35Generator(api_key="test-api-key", max_tokens=10) component.run(prompts=["test-prompt-1", "test-prompt-2"]) - assert chatgpt_patch.create.call_count == 2 - chatgpt_patch.create.assert_any_call( + assert gpt35_patch.create.call_count == 2 + gpt35_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[{"role": "user", "content": "test-prompt-1"}], stream=False, max_tokens=10, ) - chatgpt_patch.create.assert_any_call( + gpt35_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[{"role": "user", "content": "test-prompt-2"}], @@ -276,10 +276,10 @@ def test_run_with_parameters(self): @pytest.mark.unit def test_run_stream(self): - with patch("haystack.preview.components.generators.openai.chatgpt.openai.ChatCompletion") as chatgpt_patch: + with patch("haystack.preview.components.generators.openai.gpt35.openai.ChatCompletion") as gpt35_patch: mock_callback = Mock() mock_callback.side_effect = default_streaming_callback - chatgpt_patch.create.side_effect = mock_openai_stream_response + gpt35_patch.create.side_effect = mock_openai_stream_response component = GPT35Generator( api_key="test-api-key", system_prompt="test-system-prompt", streaming_callback=mock_callback ) @@ -296,8 +296,8 @@ def test_run_stream(self): } # Calls count: (10 tokens per prompt + 1 token for the role + 1 empty termination token) * 2 prompts assert mock_callback.call_count == 24 - assert chatgpt_patch.create.call_count == 2 - chatgpt_patch.create.assert_any_call( + assert gpt35_patch.create.call_count == 2 + gpt35_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[ @@ -306,7 +306,7 @@ def test_run_stream(self): ], stream=True, ) - chatgpt_patch.create.assert_any_call( + gpt35_patch.create.assert_any_call( model="gpt-3.5-turbo", api_key="test-api-key", messages=[ From 04ec229a9bf277dd593d75c9e4aa13032b08b2b8 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Sep 2023 15:16:14 +0200 Subject: [PATCH 33/37] fix e2e --- e2e/preview/components/test_gpt35_generator.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/e2e/preview/components/test_gpt35_generator.py b/e2e/preview/components/test_gpt35_generator.py index 9c49d2a8c8..2775961c5a 100644 --- a/e2e/preview/components/test_gpt35_generator.py +++ b/e2e/preview/components/test_gpt35_generator.py @@ -8,10 +8,8 @@ reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", ) def test_gpt35_generator_run(): - component = GPT35Generator(api_key=os.environ.get("OPENAI_API_KEY")) - results = component.run( - prompts=["What's the capital of France?", "What's the capital of Germany?"], model_parameters={"n": 1} - ) + component = GPT35Generator(api_key=os.environ.get("OPENAI_API_KEY"), n=1) + results = component.run(prompts=["What's the capital of France?", "What's the capital of Germany?"]) assert len(results["replies"]) == 2 assert len(results["replies"][0]) == 1 @@ -42,10 +40,8 @@ def __call__(self, chunk): return chunk callback = Callback() - component = GPT35Generator(os.environ.get("OPENAI_API_KEY"), streaming_callback=callback) - results = component.run( - prompts=["What's the capital of France?", "What's the capital of Germany?"], model_parameters={"n": 1} - ) + component = GPT35Generator(os.environ.get("OPENAI_API_KEY"), streaming_callback=callback, n=1) + results = component.run(prompts=["What's the capital of France?", "What's the capital of Germany?"]) assert len(results["replies"]) == 2 assert len(results["replies"][0]) == 1 From 8fb06ae7ea12f665789d8bfcaaf5cb48ea2fea0e Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Sep 2023 16:38:16 +0200 Subject: [PATCH 34/37] Update releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml Co-authored-by: Daria Fokina --- releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml b/releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml index baaf359496..7de48fca7e 100644 --- a/releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml +++ b/releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml @@ -1,2 +1,2 @@ preview: - - Introduce `ChatGPTGenerator`, a class that can generate completions using OpenAI Chat models like ChatGPT and GPT4. + - Introduce `GPT35Generator`, a class that can generate completions using OpenAI Chat models like ChatGPT and GPT4. From 46385acaa1a5ad65bada37e6845a50ac55d969ce Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Sep 2023 17:28:30 +0200 Subject: [PATCH 35/37] add another test --- .../components/test_gpt35_generator.py | 15 +++++++++ .../components/generators/openai/gpt35.py | 33 ------------------- 2 files changed, 15 insertions(+), 33 deletions(-) diff --git a/e2e/preview/components/test_gpt35_generator.py b/e2e/preview/components/test_gpt35_generator.py index 2775961c5a..572cad935b 100644 --- a/e2e/preview/components/test_gpt35_generator.py +++ b/e2e/preview/components/test_gpt35_generator.py @@ -1,5 +1,6 @@ import os import pytest +import openai from haystack.preview.components.generators.openai.gpt35 import GPT35Generator @@ -26,6 +27,20 @@ def test_gpt35_generator_run(): assert "stop" == results["metadata"][1][0]["finish_reason"] +@pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", +) +def test_gpt35_generator_run_above_context_length(): + component = GPT35Generator(api_key=os.environ.get("OPENAI_API_KEY"), n=1) + with pytest.raises( + openai.InvalidRequestError, + match="This model's maximum context length is 4097 tokens. However, your messages resulted in 70008 tokens. " + "Please reduce the length of the messages.", + ): + component.run(prompts=["What's the capital of France? " * 10_000]) + + @pytest.mark.skipif( not os.environ.get("OPENAI_API_KEY", None), reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", diff --git a/haystack/preview/components/generators/openai/gpt35.py b/haystack/preview/components/generators/openai/gpt35.py index 402dccde77..17062b9dc4 100644 --- a/haystack/preview/components/generators/openai/gpt35.py +++ b/haystack/preview/components/generators/openai/gpt35.py @@ -137,39 +137,6 @@ def run(self, prompts: List[str]): Queries the LLM with the prompts to produce replies. :param prompts: The prompts to be sent to the generative model. - :param api_key: The OpenAI API key. - :param model_name: The name of the model to use. - :param system_prompt: An additional message to be sent to the LLM at the beginning of each conversation. - Typically, a conversation is formatted with a system message first, followed by alternating messages from - the 'user' (the "queries") and the 'assistant' (the "responses"). The system message helps set the behavior - of the assistant. For example, you can modify the personality of the assistant or provide specific - instructions about how it should behave throughout the conversation. - :param streaming_callback: A callback function that is called when a new token is received from the stream. - The callback function should accept two parameters: the token received from the stream and **kwargs. - The callback function should return the token to be sent to the stream. If the callback function is not - provided, the token is printed to stdout. - :param api_base_url: The OpenAI API Base url, defaults to `https://api.openai.com/v1`. - :param model_parameters: A dictionary of parameters to use for the model. See OpenAI - [documentation](https://platform.openai.com/docs/api-reference/chat) for more details. Some of the supported - parameters: - - `max_tokens`: The maximum number of tokens the output text can have. - - `temperature`: What sampling temperature to use. Higher values mean the model will take more risks. - Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. - - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model - considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens - comprising the top 10% probability mass are considered. - - `n`: How many completions to generate for each prompt. For example, if the LLM gets 3 prompts and n is 2, - it will generate two completions for each of the three prompts, ending up with 6 completions in total. - - `stop`: One or more sequences after which the LLM should stop generating tokens. - - `presence_penalty`: What penalty to apply if a token is already present at all. Bigger values mean - the model will be less likely to repeat the same token in the text. - - `frequency_penalty`: What penalty to apply if a token has already been generated in the text. - Bigger values mean the model will be less likely to repeat the same token in the text. - - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the - values are the bias to add to that token. - - `openai_organization`: The OpenAI organization ID. - - See OpenAI documentation](https://platform.openai.com/docs/api-reference/chat) for more details. """ chats = [] for prompt in prompts: From 3ca3f73104eade47091bc97c6d060adb1078cade Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Sep 2023 17:48:11 +0200 Subject: [PATCH 36/37] test wrong model name --- e2e/preview/components/test_gpt35_generator.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/e2e/preview/components/test_gpt35_generator.py b/e2e/preview/components/test_gpt35_generator.py index 572cad935b..c70b8033f5 100644 --- a/e2e/preview/components/test_gpt35_generator.py +++ b/e2e/preview/components/test_gpt35_generator.py @@ -27,6 +27,16 @@ def test_gpt35_generator_run(): assert "stop" == results["metadata"][1][0]["finish_reason"] +@pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", +) +def test_gpt35_generator_run_wrong_model_name(): + component = GPT35Generator(model_name="something-obviously-wrong", api_key=os.environ.get("OPENAI_API_KEY"), n=1) + with pytest.raises(openai.InvalidRequestError, match="The model `something-obviously-wrong` does not exist"): + component.run(prompts=["What's the capital of France?"]) + + @pytest.mark.skipif( not os.environ.get("OPENAI_API_KEY", None), reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", From 101542430b3e9e1dfbd55a07a0299c56de1651ce Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Sep 2023 17:49:45 +0200 Subject: [PATCH 37/37] review feedback --- haystack/preview/components/generators/openai/gpt35.py | 1 - releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/haystack/preview/components/generators/openai/gpt35.py b/haystack/preview/components/generators/openai/gpt35.py index 17062b9dc4..e58b1ffa05 100644 --- a/haystack/preview/components/generators/openai/gpt35.py +++ b/haystack/preview/components/generators/openai/gpt35.py @@ -79,7 +79,6 @@ def __init__( Bigger values mean the model will be less likely to repeat the same token in the text. - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens and the values are the bias to add to that token. - - `openai_organization`: The OpenAI organization ID. """ self.api_key = api_key self.model_name = model_name diff --git a/releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml b/releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml index 7de48fca7e..13d9491a1e 100644 --- a/releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml +++ b/releasenotes/notes/chatgpt-llm-generator-d043532654efe684.yaml @@ -1,2 +1,2 @@ preview: - - Introduce `GPT35Generator`, a class that can generate completions using OpenAI Chat models like ChatGPT and GPT4. + - Introduce `GPT35Generator`, a class that can generate completions using OpenAI Chat models like GPT3.5 and GPT4.