refactor: OpenAI token limits (#7326)

* Update OpenAI token limits * UPdate test to be paramatrized * Add davinici and babbage * Fix remaining tests * Remove gpt2 as default tokenizer * Update doc string * Add another edge case * Add more tests * Indent blocks * Remove unnecessary developer comments
deepset-ai · Mar 25, 2024 · 1bd3ac6 · 1bd3ac6
1 parent e1ba832
commit 1bd3ac6
Show file tree

Hide file tree

Showing 4 changed files with 67 additions and 106 deletions.
diff --git a/haystack/utils/openai_utils.py b/haystack/utils/openai_utils.py
@@ -64,46 +64,37 @@ def _openai_text_completion_tokenization_details(model_name: str):
 
     :param model_name: Name of the OpenAI model.
     """
-    tokenizer_name = "gpt2"
-    max_tokens_limit = 4096  # It is the minimum max_tokens_limit value based on this ref: https://platform.openai.com/docs/models/overview
+    tokenizer_name = "cl100k_base"
+    # It is the minimum max_tokens_limit value based on this ref: https://platform.openai.com/docs/models/overview
+    max_tokens_limit = 4096
     try:
         model_tokenizer = tiktoken.encoding_name_for_model(model_name)
     except KeyError:
         model_tokenizer = None
 
     if model_tokenizer:
-        # Based on OpenAI models page, the following are the max_tokens_limit values for the corresponding models
-        ## Ref: https://platform.openai.com/docs/models/overview
-        if "davinci" in model_name:
+        tokenizer_name = model_tokenizer
+        if model_name == "davinci-002" or model_name == "babbage-002":
             max_tokens_limit = 16384
-            tokenizer_name = model_tokenizer
-        elif (
-            model_name.startswith("gpt-3.5-turbo-instruct")
-            or model_name.startswith("gpt-35-turbo-instruct")
-            or model_name.startswith("gpt-3.5-turbo-0613")
-            or model_name.startswith("gpt-35-turbo-0613")
-        ):
-            max_tokens_limit = 4096
-            tokenizer_name = model_tokenizer
-        elif model_name.startswith("gpt-3.5-turbo") or model_name.startswith("gpt-35-turbo"):
-            max_tokens_limit = 16384
-            tokenizer_name = model_tokenizer
+
+        if model_name.startswith("gpt-3.5-turbo") or model_name.startswith("gpt-35-turbo"):
+            max_tokens_limit = 16385
+            # Handles edge-cases where the value is 4096
+            if (
+                model_name == "gpt-3.5-turbo-instruct"
+                or model_name == "gpt-3.5-turbo-0613"
+                or model_name == "gpt-35-turbo-instruct"
+                or model_name == "gpt-35-turbo-0613"
+            ):
+                max_tokens_limit = 4096
+
         # Ref: https://platform.openai.com/docs/models/gpt-4
-        elif model_name.startswith("gpt-4-32k"):
-            max_tokens_limit = 32768  # tokens
-            tokenizer_name = model_tokenizer
-        elif (
-            model_name.startswith("gpt-4-1106")
-            or model_name.startswith("gpt-4-turbo-preview")
-            or model_name.startswith("gpt-4-0125-preview")
-        ):
-            max_tokens_limit = 128000  # tokens
-            tokenizer_name = model_tokenizer
-        elif model_name.startswith("gpt-4"):
-            max_tokens_limit = 8192  # tokens
-            tokenizer_name = model_tokenizer
-        else:
-            tokenizer_name = model_tokenizer
+        if model_name.startswith("gpt-4"):
+            max_tokens_limit = 128000
+            if model_name == "gpt-4" or model_name == "gpt-4-0613":
+                max_tokens_limit = 8192
+            if model_name == "gpt-4-32k" or model_name == "gpt-4-32k-0613":
+                max_tokens_limit = 32768
 
     return tokenizer_name, max_tokens_limit
 

diff --git a/test/prompt/invocation_layer/test_chatgpt.py b/test/prompt/invocation_layer/test_chatgpt.py
@@ -53,7 +53,7 @@ def test_chatgpt_token_limit_warning_single_prompt(mock_openai_tokenizer, caplog
     with caplog.at_level(logging.WARNING):
         _ = invocation_layer._ensure_token_limit(prompt="This is a test for a mock openai tokenizer.")
         assert "The prompt has been truncated from" in caplog.text
-        assert "and answer length (16379 tokens) fit within the max token limit (16384 tokens)." in caplog.text
+        assert "and answer length (16379 tokens) fit within the max token limit (16385 tokens)." in caplog.text
 
 
 @pytest.mark.unit

diff --git a/test/prompt/invocation_layer/test_openai.py b/test/prompt/invocation_layer/test_openai.py
@@ -41,21 +41,21 @@ def test_custom_api_base(mock_open_ai_request, load_openai_tokenizer):
 @pytest.mark.unit
 def test_openai_token_limit_warning(mock_openai_tokenizer, caplog):
     invocation_layer = OpenAIInvocationLayer(
-        model_name_or_path="davinci-002", api_key="fake_api_key", api_base="https://fake_api_base.com", max_length=16379
+        model_name_or_path="babbage-002", api_key="fake_api_key", api_base="https://fake_api_base.com", max_length=16385
     )
     with caplog.at_level(logging.WARNING):
         _ = invocation_layer._ensure_token_limit(prompt="This is a test for a mock openai tokenizer.")
         assert "The prompt has been truncated from" in caplog.text
-        assert "and answer length (16379 tokens) fit within the max token limit (16384 tokens)." in caplog.text
+        assert "and answer length (16385 tokens) fit within the max token limit (16384 tokens)." in caplog.text
 
 
 @pytest.mark.unit
 @pytest.mark.parametrize(
     "model_name,max_tokens_limit",
     [
         ("gpt-3.5-turbo-instruct", 4096),
-        ("gpt-3.5-turbo-0613", 4096),
-        ("gpt-3.5-turbo", 16384),
+        ("gpt-3.5-turbo", 16385),
+        ("gpt-3.5-turbo-16k", 16385),
         ("gpt-4-32k", 32768),
         ("gpt-4-1106", 128000),
         ("gpt-4-turbo-preview", 128000),
@@ -80,8 +80,8 @@ def test_openai_token_limit_warning_not_triggered(caplog, mock_openai_tokenizer,
     "model_name,max_tokens_limit",
     [
         ("gpt-3.5-turbo-instruct", 4096),
-        ("gpt-3.5-turbo-0613", 4096),
-        ("gpt-3.5-turbo", 16384),
+        ("gpt-3.5-turbo", 16385),
+        ("gpt-3.5-turbo-16k", 16385),
         ("gpt-4-32k", 32768),
         ("gpt-4-1106", 128000),
         ("gpt-4-turbo-preview", 128000),

diff --git a/test/utils/test_openai_utils.py b/test/utils/test_openai_utils.py
@@ -14,73 +14,43 @@
 
 
 @pytest.mark.unit
-def test_openai_text_completion_tokenization_details_gpt_default():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="not-recognized-name")
-    assert tokenizer_name == "gpt2"
-    assert max_tokens_limit == 4096
-
-
-@pytest.mark.unit
-def test_openai_text_completion_tokenization_details_gpt_3_5_turbo_instruct():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo-instruct")
-    assert tokenizer_name == "cl100k_base"
-    assert max_tokens_limit == 4096
-
-
-@pytest.mark.unit
-def test_openai_text_completion_tokenization_details_gpt3_5_azure():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-35-turbo")
-    assert tokenizer_name == "cl100k_base"
-    assert max_tokens_limit == 16384
-
-
-@pytest.mark.unit
-def test_openai_text_completion_tokenization_details_gpt3_5():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo")
-    assert tokenizer_name == "cl100k_base"
-    assert max_tokens_limit == 16384
-
-
-@pytest.mark.unit
-def test_openai_text_completion_tokenization_details_gpt3_5_1106():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo-1106")
-    assert tokenizer_name == "cl100k_base"
-    assert max_tokens_limit == 16384
-
-
-@pytest.mark.unit
-def test_openai_text_completion_tokenization_details_gpt_4():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4")
-    assert tokenizer_name == "cl100k_base"
-    assert max_tokens_limit == 8192
-
-
-@pytest.mark.unit
-def test_openai_text_completion_tokenization_details_gpt_4_32k():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-32k")
-    assert tokenizer_name == "cl100k_base"
-    assert max_tokens_limit == 32768
-
-
-@pytest.mark.unit
-def test_openai_text_completion_tokenization_details_gpt_4_1106():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-1106")
-    assert tokenizer_name == "cl100k_base"
-    assert max_tokens_limit == 128000
-
-
-@pytest.mark.unit
-def test_openai_text_completion_tokenization_details_gpt_4_turbo_preview():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-turbo-preview")
-    assert tokenizer_name == "cl100k_base"
-    assert max_tokens_limit == 128000
-
-
-@pytest.mark.unit
-def test_openai_text_completion_tokenization_details_gpt_4_0125_preview():
-    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-0125-preview")
-    assert tokenizer_name == "cl100k_base"
-    assert max_tokens_limit == 128000
+@pytest.mark.parametrize(
+    "model_name,tok_name,max_tok_limit",
+    [
+        # Default
+        ("not-recognized-name", "cl100k_base", 4096),
+        # GPT-3.5
+        ("gpt-3.5-turbo-0125", "cl100k_base", 16385),
+        ("gpt-3.5-turbo-instruct", "cl100k_base", 4096),
+        ("gpt-3.5-turbo-0613", "cl100k_base", 4096),
+        ("gpt-3.5-turbo", "cl100k_base", 16385),
+        ("gpt-3.5-turbo-1106", "cl100k_base", 16385),
+        ("gpt-3.5-turbo-16k", "cl100k_base", 16385),
+        ("gpt-3.5-turbo-16k-0613", "cl100k_base", 16385),
+        # GPT 4
+        ("gpt-4-0125-preview", "cl100k_base", 128000),
+        ("gpt-4-turbo-preview", "cl100k_base", 128000),
+        ("gpt-4-1106-preview", "cl100k_base", 128000),
+        ("gpt-4-vision-preview", "cl100k_base", 128000),
+        ("gpt-4-1106-vision-preview", "cl100k_base", 128000),
+        ("gpt-4", "cl100k_base", 8192),
+        ("gpt-4-0613", "cl100k_base", 8192),
+        ("gpt-4-32k", "cl100k_base", 32768),
+        ("gpt-4-32k-0613", "cl100k_base", 32768),
+        ("gpt-4-1106", "cl100k_base", 128000),
+        # GPT-35 Azure
+        ("gpt-35-turbo-instruct", "cl100k_base", 4096),
+        ("gpt-35-turbo", "cl100k_base", 16385),
+        ("gpt-35-turbo-16k", "cl100k_base", 16385),
+        # davinci and babbage
+        ("davinci-002", "cl100k_base", 16384),
+        ("babbage-002", "cl100k_base", 16384),
+    ],
+)
+def test_openai_text_completion_tokenization(model_name, tok_name, max_tok_limit):
+    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name=model_name)
+    assert tokenizer_name == tok_name
+    assert max_tokens_limit == max_tok_limit
 
 
 @pytest.mark.unit