Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add openai/gpt-3.5-turbo-0301 model #1401

Merged
merged 2 commits into from
Mar 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements-freeze.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ nltk==3.7
nodeenv==1.7.0
numba==0.56.4
numpy==1.23.3
openai==0.25.0
openai==0.27.0
openpyxl==3.0.10
outcome==1.2.0
packaging==21.3
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ sqlitedict~=1.7.0
pymongo~=4.2.0
retrying~=1.3.3
websocket-client~=1.3.2 # For Anthropic
openai~=0.25.0
openai~=0.27.0
transformers~=4.26.1
tokenizers~=0.13.2
icetk~=0.0.4
Expand Down
6 changes: 6 additions & 0 deletions src/helm/benchmark/static/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,12 @@ models:
description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf).
creator_organization: OpenAI
access: limited
- name: openai/gpt-3.5-turbo-0301
display_name: gpt-3.5-turbo-0301
description: Sibling model Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
creator_organization: OpenAI
access: limited
release_date: 2023-03-01
- name: openai/chat-gpt
display_name: ChatGPT
description: Sibling model to InstructGPT which interacts in a conversational way. See [OpenAI's announcement](https://openai.com/blog/chatgpt/). The size of the model is unknown.
Expand Down
1 change: 1 addition & 0 deletions src/helm/proxy/clients/auto_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def _get_client(self, model: str) -> Client:
client = OpenAIClient(
api_key=self.credentials["openaiApiKey"],
cache_config=cache_config,
tokenizer_client=self._get_tokenizer_client("huggingface"),
chat_gpt_client=chat_gpt_client,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mind removing chat_gpt_client and deleting ChatGPTClient? I don't think we need it now.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, will do this in a new PR.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filed #1447 to track removing ChatGPTClient.

org_id=org_id,
)
Expand Down
62 changes: 60 additions & 2 deletions src/helm/proxy/clients/openai_client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import replace
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, cast

import openai

Expand All @@ -24,15 +24,20 @@ def __init__(
self,
api_key: str,
cache_config: CacheConfig,
tokenizer_client: Client,
chat_gpt_client: Optional[ChatGPTClient] = None,
org_id: Optional[str] = None,
):
self.org_id: Optional[str] = org_id
self.api_key: str = api_key
self.api_base: str = "https://api.openai.com/v1"
self.cache = Cache(cache_config)
self.tokenizer_client: Client = tokenizer_client
self.chat_gpt_client: Optional[ChatGPTClient] = chat_gpt_client

def _is_chat_model_engine(self, model_engine: str):
return model_engine.startswith("gpt-3.5")

def make_request(self, request: Request) -> RequestResult:
if request.model_engine == "chat-gpt":
assert self.chat_gpt_client is not None
Expand All @@ -44,6 +49,28 @@ def make_request(self, request: Request) -> RequestResult:
"input": request.prompt,
"engine": request.model_engine,
}
elif self._is_chat_model_engine(request.model_engine):
raw_request = {
"model": request.model_engine,
# For now, put the whole prompt in a single user message, and expect the response
# to be returned in a single assistant message.
# TODO: Support ChatML for creating multiple messages with different roles.
# See: https://github.com/openai/openai-python/blob/main/chatml.md
"messages": [{"role": "user", "content": request.prompt}],
"temperature": request.temperature,
"top_p": request.top_p,
"n": request.num_completions,
# Note: Setting stop to ["\n"] results in an error
# See: https://community.openai.com/t/stop-n-in-gpt-3-5-turbo-leads-to-500-error/87815/15
# TODO: Handle this in the adapter.
"stop": request.stop_sequences or [], # API doesn't like empty list
# Note: Chat models may require adding an extra token to max_tokens
# for the internal special role token.
# TODO: Handle this in the adapter.
"max_tokens": request.max_tokens,
"presence_penalty": request.presence_penalty,
"frequency_penalty": request.frequency_penalty,
}
else:
raw_request = {
"engine": request.model_engine,
Expand Down Expand Up @@ -74,6 +101,14 @@ def do_it():
openai.api_base = self.api_base
return openai.Embedding.create(**raw_request)

elif self._is_chat_model_engine(request.model_engine):

def do_it():
openai.organization = self.org_id
openai.api_key = self.api_key
openai.api_base = self.api_base
return openai.ChatCompletion.create(**raw_request)

else:

def do_it():
Expand All @@ -95,14 +130,37 @@ def do_it():
# needs to be populated, and `embedding` should be an empty list and vice-versa.
embedding: List[float] = []
completions: List[Sequence] = []
tokens: List[Token]
if request.embedding:
# If the user is requesting an embedding instead of completion
# then completions would be left as an empty list. The embedding needs to be set.
embedding = response["data"][0]["embedding"]
elif self._is_chat_model_engine(request.model_engine):
for raw_completion in response["choices"]:
# The ChatGPT API doesn't support echo. If `echo_prompt` is true, combine the prompt and completion.
raw_completion_content = raw_completion["message"]["content"]
text: str = request.prompt + raw_completion_content if request.echo_prompt else raw_completion_content
# The ChatGPT API doesn't return us tokens or logprobs, so we tokenize ourselves.
tokenization_result: TokenizationRequestResult = self.tokenizer_client.tokenize(
# We're assuming ChatGPT uses the GPT-2 tokenizer.
TokenizationRequest(text, tokenizer="huggingface/gpt2")
)
# Log probs are not currently not supported by the ChatGPT, so set to 0 for now.
tokens = [
Token(text=cast(str, raw_token), logprob=0, top_logprobs={})
for raw_token in tokenization_result.raw_tokens
]
completion = Sequence(
text=text,
logprob=0, # ChatGPT does not provide logprobs
tokens=tokens,
finish_reason={"reason": raw_completion["finish_reason"]},
)
completions.append(truncate_sequence(completion, request)) # Truncate the text by stop sequences
else:
for raw_completion in response["choices"]:
sequence_logprob = 0
tokens: List[Token] = []
tokens = []

raw_data = raw_completion["logprobs"]
for text, logprob, top_logprobs in zip(
Expand Down
18 changes: 16 additions & 2 deletions src/helm/proxy/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
CHATML_MODEL_TAG: str = "chatml"

# For OpenAI models with wider context windows
WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window"
WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window" # 4000 tokens

# To fetch models that use these tokenizers
GPT2_TOKENIZER_TAG: str = "gpt2_tokenizer"
Expand Down Expand Up @@ -553,7 +553,21 @@ def engine(self) -> str:
description="Code model that is a stronger, multilingual version of the Codex (12B) model in the paper.",
tags=[CODE_MODEL_TAG, GPT2_TOKENIZER_TAG],
),
# ChatGPT - https://openai.com/blog/chatgpt
# ChatGPT: https://openai.com/blog/chatgpt
Model(
group="gpt3",
creator_organization="OpenAI",
name="openai/gpt-3.5-turbo-0301",
display_name="gpt-3.5-turbo-0301",
# https://platform.openai.com/docs/models/gpt-3-5
description="Sibling model of text-davinci-003 is optimized for chat but works well "
"for traditional completions tasks as well. Snapshot from 2023-03-01.",
# The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
# sequence length is smaller at 4087 with one user input message and one assistant
# output message because ChatGPT uses special tokens for message roles and boundaries.
# We use a rounded-down sequence length of 4000 to account for these special tokens.
tags=[TEXT_MODEL_TAG, WIDER_CONTEXT_WINDOW_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, GPT2_TOKENIZER_TAG],
),
Model(
group="gpt3",
creator_organization="OpenAI",
Expand Down