diff --git a/llm_client/llm_api_client/ai21_client.py b/llm_client/llm_api_client/ai21_client.py index 2df8988..49d61e6 100644 --- a/llm_client/llm_api_client/ai21_client.py +++ b/llm_client/llm_api_client/ai21_client.py @@ -22,9 +22,10 @@ def __init__(self, config: LLMAPIClientConfig): self._headers[AUTH_HEADER] = BEARER_TOKEN + self._api_key async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: int = 16, - temperature: float = 0.7, **kwargs) -> list[str]: + temperature: float = 0.7, top_p: float = 1,**kwargs) -> list[str]: model = model or self._default_model kwargs[PROMPT_KEY] = prompt + kwargs["topP"] = kwargs.pop("topP", top_p) kwargs["maxTokens"] = kwargs.pop("maxTokens", max_tokens) kwargs["temperature"] = temperature response = await self._session.post(self._base_url + model + "/" + COMPLETE_PATH, diff --git a/llm_client/llm_api_client/aleph_alpha_client.py b/llm_client/llm_api_client/aleph_alpha_client.py index 1b23207..1aaff3e 100644 --- a/llm_client/llm_api_client/aleph_alpha_client.py +++ b/llm_client/llm_api_client/aleph_alpha_client.py @@ -27,12 +27,13 @@ def __init__(self, config: LLMAPIClientConfig): self._headers[AUTH_HEADER] = BEARER_TOKEN + self._api_key async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: Optional[int] = None, - temperature: float = 0, **kwargs) -> \ + temperature: float = 0,top_p: float = 0, **kwargs) -> \ list[str]: self._set_model_in_kwargs(kwargs, model) if max_tokens is None: raise ValueError("max_tokens must be specified") kwargs[PROMPT_KEY] = prompt + kwargs["top_p"] = top_p kwargs["maximum_tokens"] = kwargs.pop("maximum_tokens", max_tokens) kwargs["temperature"] = temperature response = await self._session.post(self._base_url + COMPLETE_PATH, diff --git a/llm_client/llm_api_client/anthropic_client.py b/llm_client/llm_api_client/anthropic_client.py index 31a1070..644a866 100644 --- a/llm_client/llm_api_client/anthropic_client.py +++ b/llm_client/llm_api_client/anthropic_client.py @@ -27,11 +27,13 @@ def __init__(self, config: LLMAPIClientConfig): self._headers[AUTH_HEADER] = self._api_key async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: Optional[int] = None, - temperature: float = 1, + temperature: float = 1, top_p: Optional[float] = None, **kwargs) -> \ list[str]: if max_tokens is None and kwargs.get(MAX_TOKENS_KEY) is None: raise ValueError(f"max_tokens or {MAX_TOKENS_KEY} must be specified") + if top_p: + kwargs["top_p"] = top_p self._set_model_in_kwargs(kwargs, model) kwargs[PROMPT_KEY] = prompt kwargs[MAX_TOKENS_KEY] = kwargs.pop(MAX_TOKENS_KEY, max_tokens) diff --git a/llm_client/llm_api_client/base_llm_api_client.py b/llm_client/llm_api_client/base_llm_api_client.py index dc092d8..3881d94 100644 --- a/llm_client/llm_api_client/base_llm_api_client.py +++ b/llm_client/llm_api_client/base_llm_api_client.py @@ -30,7 +30,7 @@ def __init__(self, config: LLMAPIClientConfig): @abstractmethod async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: Optional[int] = None, - temperature: Optional[float] = None, **kwargs) -> list[str]: + temperature: Optional[float] = None,top_p : Optional[float] = None, **kwargs) -> list[str]: raise NotImplementedError() async def embedding(self, text: str, model: Optional[str] = None, **kwargs) -> list[float]: diff --git a/llm_client/llm_api_client/google_client.py b/llm_client/llm_api_client/google_client.py index 8a93f79..6a11338 100644 --- a/llm_client/llm_api_client/google_client.py +++ b/llm_client/llm_api_client/google_client.py @@ -33,10 +33,12 @@ def __init__(self, config: LLMAPIClientConfig): self._params = {AUTH_PARAM: self._api_key} async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: Optional[int] = 64, - temperature: Optional[float] = None, **kwargs) -> list[str]: + temperature: Optional[float] = None,top_p: Optional[float] = None, **kwargs) -> list[str]: model = model or self._default_model kwargs[PROMPT_KEY] = {TEXT_KEY: prompt} kwargs[MAX_TOKENS_KEY] = kwargs.pop(MAX_TOKENS_KEY, max_tokens) + if top_p: + kwargs["topP"] = top_p kwargs["temperature"] = kwargs.pop("temperature", temperature) response = await self._session.post(self._base_url + model + ":" + COMPLETE_PATH, params=self._params, diff --git a/llm_client/llm_api_client/huggingface_client.py b/llm_client/llm_api_client/huggingface_client.py index b1e4d59..f080ebe 100644 --- a/llm_client/llm_api_client/huggingface_client.py +++ b/llm_client/llm_api_client/huggingface_client.py @@ -29,8 +29,9 @@ def __init__(self, config: LLMAPIClientConfig): self._headers[AUTH_HEADER] = BEARER_TOKEN + self._api_key async def text_completion(self, prompt: str, max_tokens: Optional[int] = None, temperature: float = 1.0, - model: Optional[str] = None, **kwargs) -> list[str]: + model: Optional[str] = None, top_p: Optional[float] = None, **kwargs) -> list[str]: model = model or self._default_model + kwargs["top_p"] = top_p kwargs[INPUT_KEY] = prompt kwargs[TEMPERATURE_KEY] = temperature kwargs[TOKENS_KEY] = kwargs.pop(TOKENS_KEY, max_tokens) diff --git a/llm_client/llm_api_client/openai_client.py b/llm_client/llm_api_client/openai_client.py index 0ee14cd..8dda1e3 100644 --- a/llm_client/llm_api_client/openai_client.py +++ b/llm_client/llm_api_client/openai_client.py @@ -36,19 +36,21 @@ def __init__(self, config: LLMAPIClientConfig): self._client = openai async def text_completion(self, prompt: str, model: Optional[str] = None, temperature: float = 0, - max_tokens: int = 16, **kwargs) -> list[str]: + max_tokens: int = 16, top_p: float = 1, **kwargs) -> list[str]: self._set_model_in_kwargs(kwargs, model) kwargs[PROMPT_KEY] = prompt + kwargs["top_p"] = top_p kwargs["temperature"] = temperature kwargs["max_tokens"] = max_tokens completions = await self._client.Completion.acreate(headers=self._headers, **kwargs) return [choice.text for choice in completions.choices] async def chat_completion(self, messages: list[ChatMessage], temperature: float = 0, - max_tokens: int = 16, model: Optional[str] = None, **kwargs) -> list[str]: + max_tokens: int = 16, top_p: float = 1, model: Optional[str] = None, **kwargs) -> list[str]: self._set_model_in_kwargs(kwargs, model) kwargs["messages"] = [message.to_dict() for message in messages] kwargs["temperature"] = temperature + kwargs["top_p"] = top_p kwargs["max_tokens"] = max_tokens completions = await self._client.ChatCompletion.acreate(headers=self._headers, **kwargs) return [choice.message.content for choice in completions.choices] diff --git a/tests/llm_api_client/ai21_client/test_ai21.py b/tests/llm_api_client/ai21_client/test_ai21.py index b21652d..639d112 100644 --- a/tests/llm_api_client/ai21_client/test_ai21.py +++ b/tests/llm_api_client/ai21_client/test_ai21.py @@ -30,7 +30,7 @@ async def test_text_completion__sanity(mock_aioresponse, llm_client, url): 'friends, entertaining family...you get the point! One of my favorite things to do is plan parties'] mock_aioresponse.assert_called_once_with(url, method='POST', headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key }, - json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7 }, + json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7, "topP" : 1 }, raise_for_status=True) @@ -49,7 +49,7 @@ async def test_text_completion__return_multiple_completions(mock_aioresponse, ll ] mock_aioresponse.assert_called_once_with(url, method='POST', headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key}, - json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7 }, + json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7, "topP" : 1 }, raise_for_status=True) @@ -69,7 +69,7 @@ async def test_text_completion__override_model(mock_aioresponse, llm_client): 'friends, entertaining family...you get the point! One of my favorite things to do is plan parties'] mock_aioresponse.assert_called_once_with(url, method='POST', headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key}, - json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7 }, + json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7, "topP" : 1 }, raise_for_status=True) @@ -87,7 +87,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, url): 'friends, entertaining family...you get the point! One of my favorite things to do is plan parties'] mock_aioresponse.assert_called_once_with(url, method='POST', headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key}, - json={'prompt': 'These are a few of my favorite', "maxTokens" : 10, "temperature" : 0.7 }, + json={'prompt': 'These are a few of my favorite', "maxTokens" : 10, "temperature" : 0.7 ,"topP" : 1}, raise_for_status=True) diff --git a/tests/llm_api_client/anthropic_client/test_anthropic_client.py b/tests/llm_api_client/anthropic_client/test_anthropic_client.py index cdadf56..1702794 100644 --- a/tests/llm_api_client/anthropic_client/test_anthropic_client.py +++ b/tests/llm_api_client/anthropic_client/test_anthropic_client.py @@ -14,7 +14,6 @@ async def test_get_llm_api_client__with_anthropic(config): assert isinstance(actual, AnthropicClient) - @pytest.mark.asyncio async def test_text_completion__sanity(mock_aioresponse, llm_client, complete_url, anthropic_version): mock_aioresponse.post( @@ -22,7 +21,7 @@ async def test_text_completion__sanity(mock_aioresponse, llm_client, complete_ur payload={COMPLETIONS_KEY: "completion text"} ) - actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10) + actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10,) assert actual == ["completion text"] mock_aioresponse.assert_called_once_with(complete_url, method='POST', @@ -92,7 +91,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, comple payload={COMPLETIONS_KEY: "completion text"} ) - actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10, temperature=0.5) + actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10, temperature=0.5,top_p=0.5) assert actual == ["completion text"] mock_aioresponse.assert_called_once_with(complete_url, method='POST', @@ -102,7 +101,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, comple json={PROMPT_KEY: 'These are a few of my favorite', MAX_TOKENS_KEY: 10, MODEL_KEY: llm_client._default_model, - "temperature": 0.5}, + "temperature": 0.5, "top_p" : 0.5}, raise_for_status=True) diff --git a/tests/llm_api_client/google_client/test_google_client.py b/tests/llm_api_client/google_client/test_google_client.py index d404a2c..9d39faa 100644 --- a/tests/llm_api_client/google_client/test_google_client.py +++ b/tests/llm_api_client/google_client/test_google_client.py @@ -68,7 +68,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, params payload=load_json_resource("google/text_completion.json") ) - actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10, blabla="aaa") + actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10, blabla="aaa", top_p= 0.95) assert actual == ['Once upon a time, there was a young girl named Lily...', 'Once upon a time, there was a young boy named Billy...'] @@ -76,7 +76,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, params json={PROMPT_KEY: {TEXT_KEY: 'These are a few of my favorite'}, MAX_TOKENS_KEY: 10, 'temperature': None, - 'blabla': 'aaa'}, + 'blabla': 'aaa',"topP" : 0.95}, headers=llm_client._headers, raise_for_status=True, ) diff --git a/tests/llm_api_client/huggingface_client/test_huggingface.py b/tests/llm_api_client/huggingface_client/test_huggingface.py index bbb12ed..3a32182 100644 --- a/tests/llm_api_client/huggingface_client/test_huggingface.py +++ b/tests/llm_api_client/huggingface_client/test_huggingface.py @@ -28,7 +28,7 @@ async def test_text_completion__sanity(mock_aioresponse, llm_client, url): assert actual == ['Kobe Bryant is a retired professional basketball player who played for the Los Angeles Lakers of'] mock_aioresponse.assert_called_once_with(url, method='POST', headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key}, - json={'inputs': 'who is kobe bryant',"max_length": None, "temperature": 1.0}, + json={'inputs': 'who is kobe bryant',"max_length": None, "temperature": 1.0, "top_p" : None}, raise_for_status=True) @@ -44,7 +44,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, url): assert actual == ['Kobe Bryant is a retired professional basketball player who played for the Los Angeles Lakers of'] mock_aioresponse.assert_called_once_with(url, method='POST', headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key}, - json={'inputs': 'who is kobe bryant',"max_length": 10, "temperature": 1.0}, + json={'inputs': 'who is kobe bryant',"max_length": 10, "temperature": 1.0, "top_p" : None}, raise_for_status=True) diff --git a/tests/llm_api_client/openai_client/test_openai.py b/tests/llm_api_client/openai_client/test_openai.py index 28175bb..f1a80ad 100644 --- a/tests/llm_api_client/openai_client/test_openai.py +++ b/tests/llm_api_client/openai_client/test_openai.py @@ -37,7 +37,7 @@ async def test_text_completion__sanity(openai_mock, open_ai_client, model_name): openai_mock.Completion.acreate.assert_awaited_once_with( model=model_name, prompt="These are a few of my favorite", - headers={},temperature=0,max_tokens=16) + headers={},temperature=0,max_tokens=16,top_p=1) @pytest.mark.asyncio @@ -52,7 +52,7 @@ async def test_text_completion__return_multiple_completions(openai_mock, open_ai openai_mock.Completion.acreate.assert_awaited_once_with( model=model_name, prompt="These are a few of my favorite", - headers={},temperature=0,max_tokens=16) + headers={},temperature=0,max_tokens=16,top_p=1) @pytest.mark.asyncio @@ -67,7 +67,7 @@ async def test_text_completion__override_model(openai_mock, open_ai_client, mode openai_mock.Completion.acreate.assert_awaited_once_with( model=new_model_name, prompt="These are a few of my favorite", - headers={},temperature=0,max_tokens=16) + headers={},temperature=0,max_tokens=16,top_p=1) @pytest.mark.asyncio @@ -81,7 +81,7 @@ async def test_text_completion__with_kwargs(openai_mock, open_ai_client, model_n openai_mock.Completion.acreate.assert_awaited_once_with( model=model_name, prompt="These are a few of my favorite", - temperature=0,max_tokens=10, + temperature=0,max_tokens=10,top_p=1, headers={}) @@ -98,7 +98,7 @@ async def test_text_completion__with_headers(openai_mock, model_name): openai_mock.Completion.acreate.assert_awaited_once_with( model=model_name, prompt="These are a few of my favorite", - headers={"header_name": "header_value"},temperature=0,max_tokens=16) + headers={"header_name": "header_value"},temperature=0,max_tokens=16,top_p=1) @pytest.mark.asyncio @@ -112,7 +112,7 @@ async def test_chat_completion__sanity(openai_mock, open_ai_client, model_name): openai_mock.ChatCompletion.acreate.assert_awaited_once_with( model=model_name, messages=[{'content': 'Hello!', 'role': 'user'}], - headers={},temperature=0,max_tokens=16) + headers={},temperature=0,max_tokens=16,top_p=1) @pytest.mark.asyncio @@ -127,7 +127,7 @@ async def test_chat_completion__return_multiple_completions(openai_mock, open_ai openai_mock.ChatCompletion.acreate.assert_awaited_once_with( model=model_name, messages=[{'content': 'Hello!', 'role': 'user'}], - headers={},temperature=0,max_tokens=16) + headers={},temperature=0,max_tokens=16,top_p=1) @pytest.mark.asyncio @@ -142,7 +142,7 @@ async def test_chat_completion__override_model(openai_mock, open_ai_client, mode openai_mock.ChatCompletion.acreate.assert_awaited_once_with( model=new_model_name, messages=[{'content': 'Hello!', 'role': 'user'}], - headers={},temperature=0,max_tokens=16) + headers={},temperature=0,max_tokens=16,top_p=1) @pytest.mark.asyncio @@ -150,14 +150,14 @@ async def test_chat_completion__with_kwargs(openai_mock, open_ai_client, model_n openai_mock.ChatCompletion.acreate = AsyncMock( return_value=OpenAIObject.construct_from(load_json_resource("openai/chat_completion.json"))) - actual = await open_ai_client.chat_completion([ChatMessage(Role.USER, "Hello!")], max_tokens=10) + actual = await open_ai_client.chat_completion([ChatMessage(Role.USER, "Hello!")], max_tokens=10,top_p=1) assert actual == ["\n\nHello there, how may I assist you today?"] openai_mock.ChatCompletion.acreate.assert_awaited_once_with( model=model_name, messages=[{'content': 'Hello!', 'role': 'user'}], max_tokens=10, - headers={},temperature=0) + headers={},temperature=0,top_p=1) @pytest.mark.asyncio @@ -173,7 +173,7 @@ async def test_chat_completion__with_headers(openai_mock, model_name): openai_mock.ChatCompletion.acreate.assert_awaited_once_with( model=model_name, messages=[{'content': 'Hello!', 'role': 'user'}], - headers={"header_name": "header_value"},temperature=0,max_tokens=16) + headers={"header_name": "header_value"},temperature=0,max_tokens=16,top_p=1) @pytest.mark.asyncio diff --git a/tests/resources/openai/chat_completion.json b/tests/resources/openai/chat_completion.json index 6dfcf5c..60a0bf3 100644 --- a/tests/resources/openai/chat_completion.json +++ b/tests/resources/openai/chat_completion.json @@ -13,6 +13,5 @@ "usage": { "prompt_tokens": 9, "completion_tokens": 12, - "total_tokens": 21 - } + "total_tokens": 21} } diff --git a/tests/resources/openai/text_completion.json b/tests/resources/openai/text_completion.json index 3c37d13..18a1b3e 100644 --- a/tests/resources/openai/text_completion.json +++ b/tests/resources/openai/text_completion.json @@ -14,6 +14,5 @@ "usage": { "prompt_tokens": 5, "completion_tokens": 7, - "total_tokens": 12 - } + "total_tokens": 12} }