Skip to content

Commit

Permalink
Add openai/gpt-3.5-turbo-0301 model (#1401)
Browse files Browse the repository at this point in the history
  • Loading branch information
yifanmai authored Mar 28, 2023
1 parent a1c5e42 commit 2b1920e
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 6 deletions.
2 changes: 1 addition & 1 deletion requirements-freeze.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ nltk==3.7
nodeenv==1.7.0
numba==0.56.4
numpy==1.23.3
openai==0.25.0
openai==0.27.0
openpyxl==3.0.10
outcome==1.2.0
packaging==21.3
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ sqlitedict~=1.7.0
pymongo~=4.2.0
retrying~=1.3.3
websocket-client~=1.3.2 # For Anthropic
openai~=0.25.0
openai~=0.27.0
transformers~=4.26.1
tokenizers~=0.13.2
icetk~=0.0.4
Expand Down
6 changes: 6 additions & 0 deletions src/helm/benchmark/static/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,12 @@ models:
description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf).
creator_organization: OpenAI
access: limited
- name: openai/gpt-3.5-turbo-0301
display_name: gpt-3.5-turbo-0301
description: Sibling model Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
creator_organization: OpenAI
access: limited
release_date: 2023-03-01
- name: openai/chat-gpt
display_name: ChatGPT
description: Sibling model to InstructGPT which interacts in a conversational way. See [OpenAI's announcement](https://openai.com/blog/chatgpt/). The size of the model is unknown.
Expand Down
1 change: 1 addition & 0 deletions src/helm/proxy/clients/auto_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def _get_client(self, model: str) -> Client:
client = OpenAIClient(
api_key=self.credentials["openaiApiKey"],
cache_config=cache_config,
tokenizer_client=self._get_tokenizer_client("huggingface"),
chat_gpt_client=chat_gpt_client,
org_id=org_id,
)
Expand Down
62 changes: 60 additions & 2 deletions src/helm/proxy/clients/openai_client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import replace
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, cast

import openai

Expand All @@ -24,15 +24,20 @@ def __init__(
self,
api_key: str,
cache_config: CacheConfig,
tokenizer_client: Client,
chat_gpt_client: Optional[ChatGPTClient] = None,
org_id: Optional[str] = None,
):
self.org_id: Optional[str] = org_id
self.api_key: str = api_key
self.api_base: str = "https://api.openai.com/v1"
self.cache = Cache(cache_config)
self.tokenizer_client: Client = tokenizer_client
self.chat_gpt_client: Optional[ChatGPTClient] = chat_gpt_client

def _is_chat_model_engine(self, model_engine: str):
return model_engine.startswith("gpt-3.5")

def make_request(self, request: Request) -> RequestResult:
if request.model_engine == "chat-gpt":
assert self.chat_gpt_client is not None
Expand All @@ -44,6 +49,28 @@ def make_request(self, request: Request) -> RequestResult:
"input": request.prompt,
"engine": request.model_engine,
}
elif self._is_chat_model_engine(request.model_engine):
raw_request = {
"model": request.model_engine,
# For now, put the whole prompt in a single user message, and expect the response
# to be returned in a single assistant message.
# TODO: Support ChatML for creating multiple messages with different roles.
# See: https://github.com/openai/openai-python/blob/main/chatml.md
"messages": [{"role": "user", "content": request.prompt}],
"temperature": request.temperature,
"top_p": request.top_p,
"n": request.num_completions,
# Note: Setting stop to ["\n"] results in an error
# See: https://community.openai.com/t/stop-n-in-gpt-3-5-turbo-leads-to-500-error/87815/15
# TODO: Handle this in the adapter.
"stop": request.stop_sequences or [], # API doesn't like empty list
# Note: Chat models may require adding an extra token to max_tokens
# for the internal special role token.
# TODO: Handle this in the adapter.
"max_tokens": request.max_tokens,
"presence_penalty": request.presence_penalty,
"frequency_penalty": request.frequency_penalty,
}
else:
raw_request = {
"engine": request.model_engine,
Expand Down Expand Up @@ -74,6 +101,14 @@ def do_it():
openai.api_base = self.api_base
return openai.Embedding.create(**raw_request)

elif self._is_chat_model_engine(request.model_engine):

def do_it():
openai.organization = self.org_id
openai.api_key = self.api_key
openai.api_base = self.api_base
return openai.ChatCompletion.create(**raw_request)

else:

def do_it():
Expand All @@ -95,14 +130,37 @@ def do_it():
# needs to be populated, and `embedding` should be an empty list and vice-versa.
embedding: List[float] = []
completions: List[Sequence] = []
tokens: List[Token]
if request.embedding:
# If the user is requesting an embedding instead of completion
# then completions would be left as an empty list. The embedding needs to be set.
embedding = response["data"][0]["embedding"]
elif self._is_chat_model_engine(request.model_engine):
for raw_completion in response["choices"]:
# The ChatGPT API doesn't support echo. If `echo_prompt` is true, combine the prompt and completion.
raw_completion_content = raw_completion["message"]["content"]
text: str = request.prompt + raw_completion_content if request.echo_prompt else raw_completion_content
# The ChatGPT API doesn't return us tokens or logprobs, so we tokenize ourselves.
tokenization_result: TokenizationRequestResult = self.tokenizer_client.tokenize(
# We're assuming ChatGPT uses the GPT-2 tokenizer.
TokenizationRequest(text, tokenizer="huggingface/gpt2")
)
# Log probs are not currently not supported by the ChatGPT, so set to 0 for now.
tokens = [
Token(text=cast(str, raw_token), logprob=0, top_logprobs={})
for raw_token in tokenization_result.raw_tokens
]
completion = Sequence(
text=text,
logprob=0, # ChatGPT does not provide logprobs
tokens=tokens,
finish_reason={"reason": raw_completion["finish_reason"]},
)
completions.append(truncate_sequence(completion, request)) # Truncate the text by stop sequences
else:
for raw_completion in response["choices"]:
sequence_logprob = 0
tokens: List[Token] = []
tokens = []

raw_data = raw_completion["logprobs"]
for text, logprob, top_logprobs in zip(
Expand Down
18 changes: 16 additions & 2 deletions src/helm/proxy/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
CHATML_MODEL_TAG: str = "chatml"

# For OpenAI models with wider context windows
WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window"
WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window" # 4000 tokens

# To fetch models that use these tokenizers
GPT2_TOKENIZER_TAG: str = "gpt2_tokenizer"
Expand Down Expand Up @@ -553,7 +553,21 @@ def engine(self) -> str:
description="Code model that is a stronger, multilingual version of the Codex (12B) model in the paper.",
tags=[CODE_MODEL_TAG, GPT2_TOKENIZER_TAG],
),
# ChatGPT - https://openai.com/blog/chatgpt
# ChatGPT: https://openai.com/blog/chatgpt
Model(
group="gpt3",
creator_organization="OpenAI",
name="openai/gpt-3.5-turbo-0301",
display_name="gpt-3.5-turbo-0301",
# https://platform.openai.com/docs/models/gpt-3-5
description="Sibling model of text-davinci-003 is optimized for chat but works well "
"for traditional completions tasks as well. Snapshot from 2023-03-01.",
# The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
# sequence length is smaller at 4087 with one user input message and one assistant
# output message because ChatGPT uses special tokens for message roles and boundaries.
# We use a rounded-down sequence length of 4000 to account for these special tokens.
tags=[TEXT_MODEL_TAG, WIDER_CONTEXT_WINDOW_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, GPT2_TOKENIZER_TAG],
),
Model(
group="gpt3",
creator_organization="OpenAI",
Expand Down

0 comments on commit 2b1920e

Please sign in to comment.