Skip to content

Commit

Permalink
refactor: OpenAI token limits (#7326)
Browse files Browse the repository at this point in the history
* Update OpenAI token limits

* UPdate test to be paramatrized

* Add davinici and babbage

* Fix remaining tests

* Remove gpt2 as default tokenizer

* Update doc string

* Add another edge case

* Add more tests

* Indent blocks

* Remove unnecessary developer comments
  • Loading branch information
sjrl authored and vblagoje committed Mar 25, 2024
1 parent e1ba832 commit 1bd3ac6
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 106 deletions.
55 changes: 23 additions & 32 deletions haystack/utils/openai_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,46 +64,37 @@ def _openai_text_completion_tokenization_details(model_name: str):
:param model_name: Name of the OpenAI model.
"""
tokenizer_name = "gpt2"
max_tokens_limit = 4096 # It is the minimum max_tokens_limit value based on this ref: https://platform.openai.com/docs/models/overview
tokenizer_name = "cl100k_base"
# It is the minimum max_tokens_limit value based on this ref: https://platform.openai.com/docs/models/overview
max_tokens_limit = 4096
try:
model_tokenizer = tiktoken.encoding_name_for_model(model_name)
except KeyError:
model_tokenizer = None

if model_tokenizer:
# Based on OpenAI models page, the following are the max_tokens_limit values for the corresponding models
## Ref: https://platform.openai.com/docs/models/overview
if "davinci" in model_name:
tokenizer_name = model_tokenizer
if model_name == "davinci-002" or model_name == "babbage-002":
max_tokens_limit = 16384
tokenizer_name = model_tokenizer
elif (
model_name.startswith("gpt-3.5-turbo-instruct")
or model_name.startswith("gpt-35-turbo-instruct")
or model_name.startswith("gpt-3.5-turbo-0613")
or model_name.startswith("gpt-35-turbo-0613")
):
max_tokens_limit = 4096
tokenizer_name = model_tokenizer
elif model_name.startswith("gpt-3.5-turbo") or model_name.startswith("gpt-35-turbo"):
max_tokens_limit = 16384
tokenizer_name = model_tokenizer

if model_name.startswith("gpt-3.5-turbo") or model_name.startswith("gpt-35-turbo"):
max_tokens_limit = 16385
# Handles edge-cases where the value is 4096
if (
model_name == "gpt-3.5-turbo-instruct"
or model_name == "gpt-3.5-turbo-0613"
or model_name == "gpt-35-turbo-instruct"
or model_name == "gpt-35-turbo-0613"
):
max_tokens_limit = 4096

# Ref: https://platform.openai.com/docs/models/gpt-4
elif model_name.startswith("gpt-4-32k"):
max_tokens_limit = 32768 # tokens
tokenizer_name = model_tokenizer
elif (
model_name.startswith("gpt-4-1106")
or model_name.startswith("gpt-4-turbo-preview")
or model_name.startswith("gpt-4-0125-preview")
):
max_tokens_limit = 128000 # tokens
tokenizer_name = model_tokenizer
elif model_name.startswith("gpt-4"):
max_tokens_limit = 8192 # tokens
tokenizer_name = model_tokenizer
else:
tokenizer_name = model_tokenizer
if model_name.startswith("gpt-4"):
max_tokens_limit = 128000
if model_name == "gpt-4" or model_name == "gpt-4-0613":
max_tokens_limit = 8192
if model_name == "gpt-4-32k" or model_name == "gpt-4-32k-0613":
max_tokens_limit = 32768

return tokenizer_name, max_tokens_limit

Expand Down
2 changes: 1 addition & 1 deletion test/prompt/invocation_layer/test_chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_chatgpt_token_limit_warning_single_prompt(mock_openai_tokenizer, caplog
with caplog.at_level(logging.WARNING):
_ = invocation_layer._ensure_token_limit(prompt="This is a test for a mock openai tokenizer.")
assert "The prompt has been truncated from" in caplog.text
assert "and answer length (16379 tokens) fit within the max token limit (16384 tokens)." in caplog.text
assert "and answer length (16379 tokens) fit within the max token limit (16385 tokens)." in caplog.text


@pytest.mark.unit
Expand Down
12 changes: 6 additions & 6 deletions test/prompt/invocation_layer/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,21 @@ def test_custom_api_base(mock_open_ai_request, load_openai_tokenizer):
@pytest.mark.unit
def test_openai_token_limit_warning(mock_openai_tokenizer, caplog):
invocation_layer = OpenAIInvocationLayer(
model_name_or_path="davinci-002", api_key="fake_api_key", api_base="https://fake_api_base.com", max_length=16379
model_name_or_path="babbage-002", api_key="fake_api_key", api_base="https://fake_api_base.com", max_length=16385
)
with caplog.at_level(logging.WARNING):
_ = invocation_layer._ensure_token_limit(prompt="This is a test for a mock openai tokenizer.")
assert "The prompt has been truncated from" in caplog.text
assert "and answer length (16379 tokens) fit within the max token limit (16384 tokens)." in caplog.text
assert "and answer length (16385 tokens) fit within the max token limit (16384 tokens)." in caplog.text


@pytest.mark.unit
@pytest.mark.parametrize(
"model_name,max_tokens_limit",
[
("gpt-3.5-turbo-instruct", 4096),
("gpt-3.5-turbo-0613", 4096),
("gpt-3.5-turbo", 16384),
("gpt-3.5-turbo", 16385),
("gpt-3.5-turbo-16k", 16385),
("gpt-4-32k", 32768),
("gpt-4-1106", 128000),
("gpt-4-turbo-preview", 128000),
Expand All @@ -80,8 +80,8 @@ def test_openai_token_limit_warning_not_triggered(caplog, mock_openai_tokenizer,
"model_name,max_tokens_limit",
[
("gpt-3.5-turbo-instruct", 4096),
("gpt-3.5-turbo-0613", 4096),
("gpt-3.5-turbo", 16384),
("gpt-3.5-turbo", 16385),
("gpt-3.5-turbo-16k", 16385),
("gpt-4-32k", 32768),
("gpt-4-1106", 128000),
("gpt-4-turbo-preview", 128000),
Expand Down
104 changes: 37 additions & 67 deletions test/utils/test_openai_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,73 +14,43 @@


@pytest.mark.unit
def test_openai_text_completion_tokenization_details_gpt_default():
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="not-recognized-name")
assert tokenizer_name == "gpt2"
assert max_tokens_limit == 4096


@pytest.mark.unit
def test_openai_text_completion_tokenization_details_gpt_3_5_turbo_instruct():
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo-instruct")
assert tokenizer_name == "cl100k_base"
assert max_tokens_limit == 4096


@pytest.mark.unit
def test_openai_text_completion_tokenization_details_gpt3_5_azure():
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-35-turbo")
assert tokenizer_name == "cl100k_base"
assert max_tokens_limit == 16384


@pytest.mark.unit
def test_openai_text_completion_tokenization_details_gpt3_5():
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo")
assert tokenizer_name == "cl100k_base"
assert max_tokens_limit == 16384


@pytest.mark.unit
def test_openai_text_completion_tokenization_details_gpt3_5_1106():
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo-1106")
assert tokenizer_name == "cl100k_base"
assert max_tokens_limit == 16384


@pytest.mark.unit
def test_openai_text_completion_tokenization_details_gpt_4():
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4")
assert tokenizer_name == "cl100k_base"
assert max_tokens_limit == 8192


@pytest.mark.unit
def test_openai_text_completion_tokenization_details_gpt_4_32k():
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-32k")
assert tokenizer_name == "cl100k_base"
assert max_tokens_limit == 32768


@pytest.mark.unit
def test_openai_text_completion_tokenization_details_gpt_4_1106():
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-1106")
assert tokenizer_name == "cl100k_base"
assert max_tokens_limit == 128000


@pytest.mark.unit
def test_openai_text_completion_tokenization_details_gpt_4_turbo_preview():
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-turbo-preview")
assert tokenizer_name == "cl100k_base"
assert max_tokens_limit == 128000


@pytest.mark.unit
def test_openai_text_completion_tokenization_details_gpt_4_0125_preview():
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-0125-preview")
assert tokenizer_name == "cl100k_base"
assert max_tokens_limit == 128000
@pytest.mark.parametrize(
"model_name,tok_name,max_tok_limit",
[
# Default
("not-recognized-name", "cl100k_base", 4096),
# GPT-3.5
("gpt-3.5-turbo-0125", "cl100k_base", 16385),
("gpt-3.5-turbo-instruct", "cl100k_base", 4096),
("gpt-3.5-turbo-0613", "cl100k_base", 4096),
("gpt-3.5-turbo", "cl100k_base", 16385),
("gpt-3.5-turbo-1106", "cl100k_base", 16385),
("gpt-3.5-turbo-16k", "cl100k_base", 16385),
("gpt-3.5-turbo-16k-0613", "cl100k_base", 16385),
# GPT 4
("gpt-4-0125-preview", "cl100k_base", 128000),
("gpt-4-turbo-preview", "cl100k_base", 128000),
("gpt-4-1106-preview", "cl100k_base", 128000),
("gpt-4-vision-preview", "cl100k_base", 128000),
("gpt-4-1106-vision-preview", "cl100k_base", 128000),
("gpt-4", "cl100k_base", 8192),
("gpt-4-0613", "cl100k_base", 8192),
("gpt-4-32k", "cl100k_base", 32768),
("gpt-4-32k-0613", "cl100k_base", 32768),
("gpt-4-1106", "cl100k_base", 128000),
# GPT-35 Azure
("gpt-35-turbo-instruct", "cl100k_base", 4096),
("gpt-35-turbo", "cl100k_base", 16385),
("gpt-35-turbo-16k", "cl100k_base", 16385),
# davinci and babbage
("davinci-002", "cl100k_base", 16384),
("babbage-002", "cl100k_base", 16384),
],
)
def test_openai_text_completion_tokenization(model_name, tok_name, max_tok_limit):
tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name=model_name)
assert tokenizer_name == tok_name
assert max_tokens_limit == max_tok_limit


@pytest.mark.unit
Expand Down

0 comments on commit 1bd3ac6

Please sign in to comment.