Add openai/gpt-3.5-turbo-0301 model (#1401)

stanford-crfm · Mar 28, 2023 · 2b1920e · 2b1920e
1 parent a1c5e42
commit 2b1920e
Show file tree

Hide file tree

Showing 6 changed files with 85 additions and 6 deletions.
diff --git a/requirements-freeze.txt b/requirements-freeze.txt
@@ -84,7 +84,7 @@ nltk==3.7
 nodeenv==1.7.0
 numba==0.56.4
 numpy==1.23.3
-openai==0.25.0
+openai==0.27.0
 openpyxl==3.0.10
 outcome==1.2.0
 packaging==21.3

diff --git a/requirements.txt b/requirements.txt
@@ -30,7 +30,7 @@ sqlitedict~=1.7.0
 pymongo~=4.2.0
 retrying~=1.3.3
 websocket-client~=1.3.2 # For Anthropic
-openai~=0.25.0
+openai~=0.27.0
 transformers~=4.26.1
 tokenizers~=0.13.2
 icetk~=0.0.4

diff --git a/src/helm/benchmark/static/schema.yaml b/src/helm/benchmark/static/schema.yaml
@@ -407,6 +407,12 @@ models:
     description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf).
     creator_organization: OpenAI
     access: limited
+  - name: openai/gpt-3.5-turbo-0301
+    display_name: gpt-3.5-turbo-0301
+    description: Sibling model Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
+    creator_organization: OpenAI
+    access: limited
+    release_date: 2023-03-01
   - name: openai/chat-gpt
     display_name: ChatGPT
     description: Sibling model to InstructGPT which interacts in a conversational way. See [OpenAI's announcement](https://openai.com/blog/chatgpt/). The size of the model is unknown.

diff --git a/src/helm/proxy/clients/auto_client.py b/src/helm/proxy/clients/auto_client.py
@@ -82,6 +82,7 @@ def _get_client(self, model: str) -> Client:
                 client = OpenAIClient(
                     api_key=self.credentials["openaiApiKey"],
                     cache_config=cache_config,
+                    tokenizer_client=self._get_tokenizer_client("huggingface"),
                     chat_gpt_client=chat_gpt_client,
                     org_id=org_id,
                 )

diff --git a/src/helm/proxy/clients/openai_client.py b/src/helm/proxy/clients/openai_client.py
@@ -1,5 +1,5 @@
 from dataclasses import replace
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, cast
 
 import openai
 
@@ -24,15 +24,20 @@ def __init__(
         self,
         api_key: str,
         cache_config: CacheConfig,
+        tokenizer_client: Client,
         chat_gpt_client: Optional[ChatGPTClient] = None,
         org_id: Optional[str] = None,
     ):
         self.org_id: Optional[str] = org_id
         self.api_key: str = api_key
         self.api_base: str = "https://api.openai.com/v1"
         self.cache = Cache(cache_config)
+        self.tokenizer_client: Client = tokenizer_client
         self.chat_gpt_client: Optional[ChatGPTClient] = chat_gpt_client
 
+    def _is_chat_model_engine(self, model_engine: str):
+        return model_engine.startswith("gpt-3.5")
+
     def make_request(self, request: Request) -> RequestResult:
         if request.model_engine == "chat-gpt":
             assert self.chat_gpt_client is not None
@@ -44,6 +49,28 @@ def make_request(self, request: Request) -> RequestResult:
                 "input": request.prompt,
                 "engine": request.model_engine,
             }
+        elif self._is_chat_model_engine(request.model_engine):
+            raw_request = {
+                "model": request.model_engine,
+                # For now, put the whole prompt in a single user message, and expect the response
+                # to be returned in a single assistant message.
+                # TODO: Support ChatML for creating multiple messages with different roles.
+                # See: https://github.com/openai/openai-python/blob/main/chatml.md
+                "messages": [{"role": "user", "content": request.prompt}],
+                "temperature": request.temperature,
+                "top_p": request.top_p,
+                "n": request.num_completions,
+                # Note: Setting stop to ["\n"] results in an error
+                # See: https://community.openai.com/t/stop-n-in-gpt-3-5-turbo-leads-to-500-error/87815/15
+                # TODO: Handle this in the adapter.
+                "stop": request.stop_sequences or [],  # API doesn't like empty list
+                # Note: Chat models may require adding an extra token to max_tokens
+                # for the internal special role token.
+                # TODO: Handle this in the adapter.
+                "max_tokens": request.max_tokens,
+                "presence_penalty": request.presence_penalty,
+                "frequency_penalty": request.frequency_penalty,
+            }
         else:
             raw_request = {
                 "engine": request.model_engine,
@@ -74,6 +101,14 @@ def do_it():
                     openai.api_base = self.api_base
                     return openai.Embedding.create(**raw_request)
 
+            elif self._is_chat_model_engine(request.model_engine):
+
+                def do_it():
+                    openai.organization = self.org_id
+                    openai.api_key = self.api_key
+                    openai.api_base = self.api_base
+                    return openai.ChatCompletion.create(**raw_request)
+
             else:
 
                 def do_it():
@@ -95,14 +130,37 @@ def do_it():
         # needs to be populated, and `embedding` should be an empty list and vice-versa.
         embedding: List[float] = []
         completions: List[Sequence] = []
+        tokens: List[Token]
         if request.embedding:
             # If the user is requesting an embedding instead of completion
             # then completions would be left as an empty list. The embedding needs to be set.
             embedding = response["data"][0]["embedding"]
+        elif self._is_chat_model_engine(request.model_engine):
+            for raw_completion in response["choices"]:
+                # The ChatGPT API doesn't support echo. If `echo_prompt` is true, combine the prompt and completion.
+                raw_completion_content = raw_completion["message"]["content"]
+                text: str = request.prompt + raw_completion_content if request.echo_prompt else raw_completion_content
+                # The ChatGPT API doesn't return us tokens or logprobs, so we tokenize ourselves.
+                tokenization_result: TokenizationRequestResult = self.tokenizer_client.tokenize(
+                    # We're assuming ChatGPT uses the GPT-2 tokenizer.
+                    TokenizationRequest(text, tokenizer="huggingface/gpt2")
+                )
+                # Log probs are not currently not supported by the ChatGPT, so set to 0 for now.
+                tokens = [
+                    Token(text=cast(str, raw_token), logprob=0, top_logprobs={})
+                    for raw_token in tokenization_result.raw_tokens
+                ]
+                completion = Sequence(
+                    text=text,
+                    logprob=0,  # ChatGPT does not provide logprobs
+                    tokens=tokens,
+                    finish_reason={"reason": raw_completion["finish_reason"]},
+                )
+                completions.append(truncate_sequence(completion, request))  # Truncate the text by stop sequences
         else:
             for raw_completion in response["choices"]:
                 sequence_logprob = 0
-                tokens: List[Token] = []
+                tokens = []
 
                 raw_data = raw_completion["logprobs"]
                 for text, logprob, top_logprobs in zip(

diff --git a/src/helm/proxy/models.py b/src/helm/proxy/models.py
@@ -15,7 +15,7 @@
 CHATML_MODEL_TAG: str = "chatml"
 
 # For OpenAI models with wider context windows
-WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window"
+WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window"  # 4000 tokens
 
 # To fetch models that use these tokenizers
 GPT2_TOKENIZER_TAG: str = "gpt2_tokenizer"
@@ -553,7 +553,21 @@ def engine(self) -> str:
         description="Code model that is a stronger, multilingual version of the Codex (12B) model in the paper.",
         tags=[CODE_MODEL_TAG, GPT2_TOKENIZER_TAG],
     ),
-    # ChatGPT - https://openai.com/blog/chatgpt
+    # ChatGPT: https://openai.com/blog/chatgpt
+    Model(
+        group="gpt3",
+        creator_organization="OpenAI",
+        name="openai/gpt-3.5-turbo-0301",
+        display_name="gpt-3.5-turbo-0301",
+        # https://platform.openai.com/docs/models/gpt-3-5
+        description="Sibling model of text-davinci-003 is optimized for chat but works well "
+        "for traditional completions tasks as well. Snapshot from 2023-03-01.",
+        # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
+        # sequence length is smaller at 4087 with one user input message and one assistant
+        # output message because ChatGPT uses special tokens for message roles and boundaries.
+        # We use a rounded-down sequence length of 4000 to account for these special tokens.
+        tags=[TEXT_MODEL_TAG, WIDER_CONTEXT_WINDOW_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, GPT2_TOKENIZER_TAG],
+    ),
     Model(
         group="gpt3",
         creator_organization="OpenAI",