Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

response_format support #12

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
27 changes: 21 additions & 6 deletions unify/clients.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from typing import AsyncGenerator, Dict, Generator, List, Optional, Union

import openai
import requests
from unify.exceptions import BadRequestError, UnifyError, status_error_map
Expand Down Expand Up @@ -130,6 +129,7 @@ def generate( # noqa: WPS234, WPS211
temperature: Optional[float] = 1.0,
stop: Optional[List[str]] = None,
stream: bool = False,
response_format: Optional[dict] = {"type": "text"}
) -> Union[Generator[str, None, None], str]: # noqa: DAR101, DAR201, DAR401
"""Generate content using the Unify API.

Expand Down Expand Up @@ -157,6 +157,9 @@ def generate( # noqa: WPS234, WPS211
If False, generates content as a single response.
Defaults to False.

response_format (Optional[str]): The format of the response that will be used for openai models
Will be sent as a parameter inside response_format= {type : format} in the request body
format can be either "json_object" or "text". User has to also explicitly ask for JSON in the prompt!
Returns:
Union[Generator[str, None, None], str]: If stream is True,
returns a generator yielding chunks of content.
Expand All @@ -179,11 +182,11 @@ def generate( # noqa: WPS234, WPS211
return self._generate_stream(contents, self._endpoint,
max_tokens=max_tokens,
temperature=temperature,
stop=stop)
stop=stop, response_format=response_format)
return self._generate_non_stream(contents, self._endpoint,
max_tokens=max_tokens,
temperature=temperature,
stop=stop)
stop=stop, response_format=response_format)

def get_credit_balance(self) -> float:
# noqa: DAR201, DAR401
Expand Down Expand Up @@ -218,6 +221,7 @@ def _generate_stream(
max_tokens: Optional[int] = 1024,
temperature: Optional[float] = 1.0,
stop: Optional[List[str]] = None,
response_format: Optional[dict] = {"type": "text"},
) -> Generator[str, None, None]:
try:
chat_completion = self.client.chat.completions.create(
Expand All @@ -227,6 +231,7 @@ def _generate_stream(
temperature=temperature,
stop=stop,
stream=True,
response_format=response_format,
)
for chunk in chat_completion:
content = chunk.choices[0].delta.content # type: ignore[union-attr]
Expand All @@ -243,6 +248,7 @@ def _generate_non_stream(
max_tokens: Optional[int] = 1024,
temperature: Optional[float] = 1.0,
stop: Optional[List[str]] = None,
response_format: Optional[dict] = {"type": "text"},
) -> str:
try:
chat_completion = self.client.chat.completions.create(
Expand All @@ -252,7 +258,7 @@ def _generate_non_stream(
temperature=temperature,
stop=stop,
stream=False,
)
response_format= response_format)
self.set_provider(
chat_completion.model.split( # type: ignore[union-attr]
"@",
Expand Down Expand Up @@ -413,6 +419,7 @@ async def generate( # noqa: WPS234, WPS211
temperature: Optional[float] = 1.0,
stop: Optional[List[str]] = None,
stream: bool = False,
response_format: Optional[dict] = {"type": "text"},
) -> Union[AsyncGenerator[str, None], str]: # noqa: DAR101, DAR201, DAR401
"""Generate content asynchronously using the Unify API.

Expand Down Expand Up @@ -440,6 +447,10 @@ async def generate( # noqa: WPS234, WPS211
If False, generates content as a single response.
Defaults to False.

format (Optional[str]): The format of the response that will be used for openai models
Will be sent as a parameter inside response_format= {type : format} in the request body
format can be either "json_object" or "text". User has to also explicitly ask for JSON in the prompt!

Returns:
Union[AsyncGenerator[str, None], List[str]]: If stream is True,
returns an asynchronous generator yielding chunks of content.
Expand All @@ -460,8 +471,8 @@ async def generate( # noqa: WPS234, WPS211
raise UnifyError("You must provide either the user_prompt or messages!")

if stream:
return self._generate_stream(contents, self._endpoint, max_tokens=max_tokens, stop=stop, temperature=temperature)
return await self._generate_non_stream(contents, self._endpoint, max_tokens=max_tokens, stop=stop, temperature=temperature)
return self._generate_stream(contents, self._endpoint, max_tokens=max_tokens, stop=stop, temperature=temperature,response_format=response_format)
return await self._generate_non_stream(contents, self._endpoint, max_tokens=max_tokens, stop=stop, temperature=temperature,response_format=response_format)

async def _generate_stream(
self,
Expand All @@ -470,6 +481,7 @@ async def _generate_stream(
max_tokens: Optional[int] = None,
temperature: Optional[float] = 1.0,
stop: Optional[List[str]] = None,
response_format: Optional[dict] = {"type": "text"},
) -> AsyncGenerator[str, None]:
try:
async_stream = await self.client.chat.completions.create(
Expand All @@ -479,6 +491,7 @@ async def _generate_stream(
temperature=temperature,
stop=stop,
stream=True,
response_format= response_format,
)
async for chunk in async_stream: # type: ignore[union-attr]
self.set_provider(chunk.model.split("@")[-1])
Expand All @@ -493,6 +506,7 @@ async def _generate_non_stream(
max_tokens: Optional[int] = None,
temperature: Optional[float] = 1.0,
stop: Optional[List[str]] = None,
response_format: Optional[dict] = {"type": "text"}
) -> str:
try:
async_response = await self.client.chat.completions.create(
Expand All @@ -502,6 +516,7 @@ async def _generate_non_stream(
temperature=temperature,
stop=stop,
stream=False,
response_format = response_format,
)
self.set_provider(async_response.model.split("@")[-1]) # type: ignore
return async_response.choices[0].message.content.strip(" ") # type: ignore # noqa: E501, WPS219
Expand Down