diff --git a/unify/clients.py b/unify/clients.py index ea0de13..fe8840c 100644 --- a/unify/clients.py +++ b/unify/clients.py @@ -1,5 +1,4 @@ from typing import AsyncGenerator, Dict, Generator, List, Optional, Union - import openai import requests from unify.exceptions import BadRequestError, UnifyError, status_error_map @@ -130,6 +129,7 @@ def generate( # noqa: WPS234, WPS211 temperature: Optional[float] = 1.0, stop: Optional[List[str]] = None, stream: bool = False, + response_format: Optional[dict] = {"type": "text"} ) -> Union[Generator[str, None, None], str]: # noqa: DAR101, DAR201, DAR401 """Generate content using the Unify API. @@ -157,6 +157,9 @@ def generate( # noqa: WPS234, WPS211 If False, generates content as a single response. Defaults to False. + response_format (Optional[str]): The format of the response that will be used for openai models + Will be sent as a parameter inside response_format= {type : format} in the request body + format can be either "json_object" or "text". User has to also explicitly ask for JSON in the prompt! Returns: Union[Generator[str, None, None], str]: If stream is True, returns a generator yielding chunks of content. @@ -179,11 +182,11 @@ def generate( # noqa: WPS234, WPS211 return self._generate_stream(contents, self._endpoint, max_tokens=max_tokens, temperature=temperature, - stop=stop) + stop=stop, response_format=response_format) return self._generate_non_stream(contents, self._endpoint, max_tokens=max_tokens, temperature=temperature, - stop=stop) + stop=stop, response_format=response_format) def get_credit_balance(self) -> float: # noqa: DAR201, DAR401 @@ -218,6 +221,7 @@ def _generate_stream( max_tokens: Optional[int] = 1024, temperature: Optional[float] = 1.0, stop: Optional[List[str]] = None, + response_format: Optional[dict] = {"type": "text"}, ) -> Generator[str, None, None]: try: chat_completion = self.client.chat.completions.create( @@ -227,6 +231,7 @@ def _generate_stream( temperature=temperature, stop=stop, stream=True, + response_format=response_format, ) for chunk in chat_completion: content = chunk.choices[0].delta.content # type: ignore[union-attr] @@ -243,6 +248,7 @@ def _generate_non_stream( max_tokens: Optional[int] = 1024, temperature: Optional[float] = 1.0, stop: Optional[List[str]] = None, + response_format: Optional[dict] = {"type": "text"}, ) -> str: try: chat_completion = self.client.chat.completions.create( @@ -252,7 +258,7 @@ def _generate_non_stream( temperature=temperature, stop=stop, stream=False, - ) + response_format= response_format) self.set_provider( chat_completion.model.split( # type: ignore[union-attr] "@", @@ -413,6 +419,7 @@ async def generate( # noqa: WPS234, WPS211 temperature: Optional[float] = 1.0, stop: Optional[List[str]] = None, stream: bool = False, + response_format: Optional[dict] = {"type": "text"}, ) -> Union[AsyncGenerator[str, None], str]: # noqa: DAR101, DAR201, DAR401 """Generate content asynchronously using the Unify API. @@ -440,6 +447,10 @@ async def generate( # noqa: WPS234, WPS211 If False, generates content as a single response. Defaults to False. + format (Optional[str]): The format of the response that will be used for openai models + Will be sent as a parameter inside response_format= {type : format} in the request body + format can be either "json_object" or "text". User has to also explicitly ask for JSON in the prompt! + Returns: Union[AsyncGenerator[str, None], List[str]]: If stream is True, returns an asynchronous generator yielding chunks of content. @@ -460,8 +471,8 @@ async def generate( # noqa: WPS234, WPS211 raise UnifyError("You must provide either the user_prompt or messages!") if stream: - return self._generate_stream(contents, self._endpoint, max_tokens=max_tokens, stop=stop, temperature=temperature) - return await self._generate_non_stream(contents, self._endpoint, max_tokens=max_tokens, stop=stop, temperature=temperature) + return self._generate_stream(contents, self._endpoint, max_tokens=max_tokens, stop=stop, temperature=temperature,response_format=response_format) + return await self._generate_non_stream(contents, self._endpoint, max_tokens=max_tokens, stop=stop, temperature=temperature,response_format=response_format) async def _generate_stream( self, @@ -470,6 +481,7 @@ async def _generate_stream( max_tokens: Optional[int] = None, temperature: Optional[float] = 1.0, stop: Optional[List[str]] = None, + response_format: Optional[dict] = {"type": "text"}, ) -> AsyncGenerator[str, None]: try: async_stream = await self.client.chat.completions.create( @@ -479,6 +491,7 @@ async def _generate_stream( temperature=temperature, stop=stop, stream=True, + response_format= response_format, ) async for chunk in async_stream: # type: ignore[union-attr] self.set_provider(chunk.model.split("@")[-1]) @@ -493,6 +506,7 @@ async def _generate_non_stream( max_tokens: Optional[int] = None, temperature: Optional[float] = 1.0, stop: Optional[List[str]] = None, + response_format: Optional[dict] = {"type": "text"} ) -> str: try: async_response = await self.client.chat.completions.create( @@ -502,6 +516,7 @@ async def _generate_non_stream( temperature=temperature, stop=stop, stream=False, + response_format = response_format, ) self.set_provider(async_response.model.split("@")[-1]) # type: ignore return async_response.choices[0].message.content.strip(" ") # type: ignore # noqa: E501, WPS219