Skip to content

Commit

Permalink
fix: Do not retry when context window has been exceeded (#2126)
Browse files Browse the repository at this point in the history
* Add typed token limit exception

* Do not tenacity retry on PhoenixExceptions

* Do not retry when max context window exceeded - OpenAI

* Do not retry when max context window exceeded - Bedrock

* Do not retry when max context window exceeded - Anthropic

* Normalize exception names

* Catch more bedrock errors

* Improve error handling per PR feedback
  • Loading branch information
anticorrelator authored Jan 29, 2024
1 parent 7be1ab8 commit ff6df1f
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 27 deletions.
4 changes: 4 additions & 0 deletions src/phoenix/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
class PhoenixException(Exception):
pass


class PhoenixContextLimitExceeded(PhoenixException):
pass
21 changes: 17 additions & 4 deletions src/phoenix/experimental/evals/models/anthropic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Dict, List, Optional

from phoenix.exceptions import PhoenixContextLimitExceeded
from phoenix.experimental.evals.models.base import BaseEvalModel
from phoenix.experimental.evals.models.rate_limiters import RateLimiter

Expand Down Expand Up @@ -139,8 +140,14 @@ def _generate_with_retry(self, **kwargs: Any) -> Any:
@self.retry
@self._rate_limiter.limit
def _completion_with_retry(**kwargs: Any) -> Any:
response = self.client.completions.create(**kwargs)
return response.completion
try:
response = self.client.completions.create(**kwargs)
return response.completion
except self._anthropic.BadRequestError as e:
exception_message = e.args[0]
if exception_message and "prompt is too long" in exception_message:
raise PhoenixContextLimitExceeded(exception_message) from e
raise e

return _completion_with_retry(**kwargs)

Expand All @@ -160,8 +167,14 @@ async def _async_generate_with_retry(self, **kwargs: Any) -> Any:
@self.retry
@self._rate_limiter.alimit
async def _async_completion_with_retry(**kwargs: Any) -> Any:
response = await self.async_client.completions.create(**kwargs)
return response.completion
try:
response = await self.async_client.completions.create(**kwargs)
return response.completion
except self._anthropic.BadRequestError as e:
exception_message = e.args[0]
if exception_message and "prompt is too long" in exception_message:
raise PhoenixContextLimitExceeded(exception_message) from e
raise e

return await _async_completion_with_retry(**kwargs)

Expand Down
5 changes: 5 additions & 0 deletions src/phoenix/experimental/evals/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Callable, Generator, List, Optional, Sequence, Type

from phoenix.exceptions import PhoenixException
from phoenix.experimental.evals.models.rate_limiters import RateLimiter

if TYPE_CHECKING:
Expand All @@ -15,6 +16,7 @@
retry,
retry_base,
retry_if_exception_type,
retry_unless_exception_type,
stop_after_attempt,
wait_random_exponential,
)
Expand Down Expand Up @@ -103,6 +105,9 @@ def log_retry(retry_state: RetryCallState) -> None:
retry_instance: retry_base = retry_if_exception_type(error_types[0])
for error in error_types[1:]:
retry_instance = retry_instance | retry_if_exception_type(error)

internal_error_bypass: retry_base = retry_unless_exception_type(PhoenixException)
retry_instance = retry_instance & internal_error_bypass
return retry(
reraise=True,
stop=stop_after_attempt(max_retries),
Expand Down
19 changes: 18 additions & 1 deletion src/phoenix/experimental/evals/models/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Dict, List, Optional

from phoenix.exceptions import PhoenixContextLimitExceeded
from phoenix.experimental.evals.models.base import BaseEvalModel
from phoenix.experimental.evals.models.rate_limiters import RateLimiter

Expand Down Expand Up @@ -142,7 +143,23 @@ def _generate_with_retry(self, **kwargs: Any) -> Any:
@self.retry
@self._rate_limiter.limit
def _completion_with_retry(**kwargs: Any) -> Any:
return self.client.invoke_model(**kwargs)
try:
return self.client.invoke_model(**kwargs)
except Exception as e:
exception_message = e.args[0]
if not exception_message:
raise e

if "Input is too long" in exception_message:
# Error from Anthropic models
raise PhoenixContextLimitExceeded(exception_message) from e
elif "expected maxLength" in exception_message:
# Error from Titan models
raise PhoenixContextLimitExceeded(exception_message) from e
elif "Prompt has too many tokens" in exception_message:
# Error from AI21 models
raise PhoenixContextLimitExceeded(exception_message) from e
raise e

return _completion_with_retry(**kwargs)

Expand Down
57 changes: 35 additions & 22 deletions src/phoenix/experimental/evals/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
get_origin,
)

from phoenix.exceptions import PhoenixContextLimitExceeded
from phoenix.experimental.evals.models.base import BaseEvalModel
from phoenix.experimental.evals.models.rate_limiters import RateLimiter

Expand Down Expand Up @@ -302,18 +303,24 @@ async def _async_generate_with_retry(self, **kwargs: Any) -> Any:
@self.retry
@self._rate_limiter.alimit
async def _completion_with_retry(**kwargs: Any) -> Any:
if self._model_uses_legacy_completion_api:
if "prompt" not in kwargs:
kwargs["prompt"] = "\n\n".join(
(message.get("content") or "")
for message in (kwargs.pop("messages", None) or ())
)
# OpenAI 1.0.0 API responses are pydantic objects, not dicts
# We must dump the model to get the dict
res = await self._async_client.completions.create(**kwargs)
else:
res = await self._async_client.chat.completions.create(**kwargs)
return res.model_dump()
try:
if self._model_uses_legacy_completion_api:
if "prompt" not in kwargs:
kwargs["prompt"] = "\n\n".join(
(message.get("content") or "")
for message in (kwargs.pop("messages", None) or ())
)
# OpenAI 1.0.0 API responses are pydantic objects, not dicts
# We must dump the model to get the dict
res = await self._async_client.completions.create(**kwargs)
else:
res = await self._async_client.chat.completions.create(**kwargs)
return res.model_dump()
except self._openai._exceptions.BadRequestError as e:
exception_message = e.args[0]
if exception_message and "maximum context length" in exception_message:
raise PhoenixContextLimitExceeded(exception_message) from e
raise e

return await _completion_with_retry(**kwargs)

Expand All @@ -323,16 +330,22 @@ def _generate_with_retry(self, **kwargs: Any) -> Any:
@self.retry
@self._rate_limiter.limit
def _completion_with_retry(**kwargs: Any) -> Any:
if self._model_uses_legacy_completion_api:
if "prompt" not in kwargs:
kwargs["prompt"] = "\n\n".join(
(message.get("content") or "")
for message in (kwargs.pop("messages", None) or ())
)
# OpenAI 1.0.0 API responses are pydantic objects, not dicts
# We must dump the model to get the dict
return self._client.completions.create(**kwargs).model_dump()
return self._client.chat.completions.create(**kwargs).model_dump()
try:
if self._model_uses_legacy_completion_api:
if "prompt" not in kwargs:
kwargs["prompt"] = "\n\n".join(
(message.get("content") or "")
for message in (kwargs.pop("messages", None) or ())
)
# OpenAI 1.0.0 API responses are pydantic objects, not dicts
# We must dump the model to get the dict
return self._client.completions.create(**kwargs).model_dump()
return self._client.chat.completions.create(**kwargs).model_dump()
except self._openai._exceptions.BadRequestError as e:
exception_message = e.args[0]
if exception_message and "maximum context length" in exception_message:
raise PhoenixContextLimitExceeded(exception_message) from e
raise e

return _completion_with_retry(**kwargs)

Expand Down

0 comments on commit ff6df1f

Please sign in to comment.