uripeled2 · uripeled2 · Jul 14, 2023 · Jul 1, 2023 · Jul 1, 2023 · Jul 1, 2023
diff --git a/llm_client/llm_api_client/ai21_client.py b/llm_client/llm_api_client/ai21_client.py
@@ -22,9 +22,10 @@ def __init__(self, config: LLMAPIClientConfig):
         self._headers[AUTH_HEADER] = BEARER_TOKEN + self._api_key
 
     async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: int = 16,
-                              temperature: float = 0.7, **kwargs) -> list[str]:
+                              temperature: float = 0.7, top_p: float = 1,**kwargs) -> list[str]:
         model = model or self._default_model
         kwargs[PROMPT_KEY] = prompt
+        kwargs["topP"] = kwargs.pop("topP", top_p)
         kwargs["maxTokens"] = kwargs.pop("maxTokens", max_tokens)
         kwargs["temperature"] = temperature
         response = await self._session.post(self._base_url + model + "/" + COMPLETE_PATH,

diff --git a/llm_client/llm_api_client/aleph_alpha_client.py b/llm_client/llm_api_client/aleph_alpha_client.py
@@ -27,12 +27,13 @@ def __init__(self, config: LLMAPIClientConfig):
         self._headers[AUTH_HEADER] = BEARER_TOKEN + self._api_key
 
     async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: Optional[int] = None,
-                              temperature: float = 0, **kwargs) -> \
+                              temperature: float = 0,top_p: float = 0, **kwargs) -> \
             list[str]:
         self._set_model_in_kwargs(kwargs, model)
         if max_tokens is None:
             raise ValueError("max_tokens must be specified")
         kwargs[PROMPT_KEY] = prompt
+        kwargs["top_p"] = top_p
         kwargs["maximum_tokens"] = kwargs.pop("maximum_tokens", max_tokens)
         kwargs["temperature"] = temperature
         response = await self._session.post(self._base_url + COMPLETE_PATH,

diff --git a/llm_client/llm_api_client/anthropic_client.py b/llm_client/llm_api_client/anthropic_client.py
@@ -27,11 +27,13 @@ def __init__(self, config: LLMAPIClientConfig):
         self._headers[AUTH_HEADER] = self._api_key
 
     async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: Optional[int] = None,
-                              temperature: float = 1,
+                              temperature: float = 1, top_p: Optional[float] = None,
                               **kwargs) -> \
             list[str]:
         if max_tokens is None and kwargs.get(MAX_TOKENS_KEY) is None:
             raise ValueError(f"max_tokens or {MAX_TOKENS_KEY} must be specified")
+        if top_p:
+            kwargs["top_p"] = top_p
         self._set_model_in_kwargs(kwargs, model)
         kwargs[PROMPT_KEY] = prompt
         kwargs[MAX_TOKENS_KEY] = kwargs.pop(MAX_TOKENS_KEY, max_tokens)

diff --git a/llm_client/llm_api_client/base_llm_api_client.py b/llm_client/llm_api_client/base_llm_api_client.py
@@ -30,7 +30,7 @@ def __init__(self, config: LLMAPIClientConfig):
 
     @abstractmethod
     async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: Optional[int] = None,
-                              temperature: Optional[float] = None, **kwargs) -> list[str]:
+                              temperature: Optional[float] = None,top_p :  Optional[float] = None, **kwargs) -> list[str]:
         raise NotImplementedError()
 
     async def embedding(self, text: str, model: Optional[str] = None, **kwargs) -> list[float]:

diff --git a/llm_client/llm_api_client/google_client.py b/llm_client/llm_api_client/google_client.py
@@ -33,10 +33,12 @@ def __init__(self, config: LLMAPIClientConfig):
         self._params = {AUTH_PARAM: self._api_key}
 
     async def text_completion(self, prompt: str, model: Optional[str] = None, max_tokens: Optional[int] = 64,
-                              temperature: Optional[float] = None, **kwargs) -> list[str]:
+                              temperature: Optional[float] = None,top_p: Optional[float] = None, **kwargs) -> list[str]:
         model = model or self._default_model
         kwargs[PROMPT_KEY] = {TEXT_KEY: prompt}
         kwargs[MAX_TOKENS_KEY] = kwargs.pop(MAX_TOKENS_KEY, max_tokens)
+        if top_p:
+            kwargs["topP"] = top_p
         kwargs["temperature"] = kwargs.pop("temperature", temperature)
         response = await self._session.post(self._base_url + model + ":" + COMPLETE_PATH,
                                             params=self._params,

diff --git a/llm_client/llm_api_client/huggingface_client.py b/llm_client/llm_api_client/huggingface_client.py
@@ -29,8 +29,9 @@ def __init__(self, config: LLMAPIClientConfig):
         self._headers[AUTH_HEADER] = BEARER_TOKEN + self._api_key
 
     async def text_completion(self, prompt: str, max_tokens: Optional[int] = None, temperature: float = 1.0,
-                              model: Optional[str] = None, **kwargs) -> list[str]:
+                              model: Optional[str] = None, top_p: Optional[float] = None, **kwargs) -> list[str]:
         model = model or self._default_model
+        kwargs["top_p"] = top_p
         kwargs[INPUT_KEY] = prompt
         kwargs[TEMPERATURE_KEY] = temperature
         kwargs[TOKENS_KEY] = kwargs.pop(TOKENS_KEY, max_tokens)

diff --git a/llm_client/llm_api_client/openai_client.py b/llm_client/llm_api_client/openai_client.py
@@ -36,19 +36,21 @@ def __init__(self, config: LLMAPIClientConfig):
         self._client = openai
 
     async def text_completion(self, prompt: str, model: Optional[str] = None, temperature: float = 0,
-                              max_tokens: int = 16, **kwargs) -> list[str]:
+                              max_tokens: int = 16, top_p: float = 1, **kwargs) -> list[str]:
         self._set_model_in_kwargs(kwargs, model)
         kwargs[PROMPT_KEY] = prompt
+        kwargs["top_p"] = top_p
         kwargs["temperature"] = temperature
         kwargs["max_tokens"] = max_tokens
         completions = await self._client.Completion.acreate(headers=self._headers, **kwargs)
         return [choice.text for choice in completions.choices]
 
     async def chat_completion(self, messages: list[ChatMessage], temperature: float = 0,
-                              max_tokens: int = 16, model: Optional[str] = None, **kwargs) -> list[str]:
+                              max_tokens: int = 16, top_p: float = 1, model: Optional[str] = None, **kwargs) -> list[str]:
         self._set_model_in_kwargs(kwargs, model)
         kwargs["messages"] = [message.to_dict() for message in messages]
         kwargs["temperature"] = temperature
+        kwargs["top_p"] = top_p
         kwargs["max_tokens"] = max_tokens
         completions = await self._client.ChatCompletion.acreate(headers=self._headers, **kwargs)
         return [choice.message.content for choice in completions.choices]

diff --git a/tests/llm_api_client/ai21_client/test_ai21.py b/tests/llm_api_client/ai21_client/test_ai21.py
@@ -30,7 +30,7 @@ async def test_text_completion__sanity(mock_aioresponse, llm_client, url):
         'friends, entertaining family...you get the point! One of my favorite things to do is plan parties']
     mock_aioresponse.assert_called_once_with(url, method='POST',
                                              headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key },
-                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7 },
+                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7, "topP" : 1 },
                                              raise_for_status=True)
 
 
@@ -49,7 +49,7 @@ async def test_text_completion__return_multiple_completions(mock_aioresponse, ll
     ]
     mock_aioresponse.assert_called_once_with(url, method='POST',
                                              headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key},
-                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7 },
+                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7, "topP" : 1  },
                                              raise_for_status=True)
 
 
@@ -69,7 +69,7 @@ async def test_text_completion__override_model(mock_aioresponse, llm_client):
         'friends, entertaining family...you get the point! One of my favorite things to do is plan parties']
     mock_aioresponse.assert_called_once_with(url, method='POST',
                                              headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key},
-                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7 },
+                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 16, "temperature" : 0.7, "topP" : 1 },
                                              raise_for_status=True)
 
 
@@ -87,7 +87,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, url):
         'friends, entertaining family...you get the point! One of my favorite things to do is plan parties']
     mock_aioresponse.assert_called_once_with(url, method='POST',
                                              headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key},
-                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 10, "temperature" : 0.7 },
+                                             json={'prompt': 'These are a few of my favorite', "maxTokens" : 10, "temperature" : 0.7 ,"topP" : 1},
                                              raise_for_status=True)
 
 

diff --git a/tests/llm_api_client/anthropic_client/test_anthropic_client.py b/tests/llm_api_client/anthropic_client/test_anthropic_client.py
@@ -14,15 +14,14 @@ async def test_get_llm_api_client__with_anthropic(config):
 
     assert isinstance(actual, AnthropicClient)
 
-
 @pytest.mark.asyncio
 async def test_text_completion__sanity(mock_aioresponse, llm_client, complete_url, anthropic_version):
     mock_aioresponse.post(
         complete_url,
         payload={COMPLETIONS_KEY: "completion text"}
     )
 
-    actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10)
+    actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10,)
 
     assert actual == ["completion text"]
     mock_aioresponse.assert_called_once_with(complete_url, method='POST',
@@ -92,7 +91,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, comple
         payload={COMPLETIONS_KEY: "completion text"}
     )
 
-    actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10, temperature=0.5)
+    actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10, temperature=0.5,top_p=0.5)
 
     assert actual == ["completion text"]
     mock_aioresponse.assert_called_once_with(complete_url, method='POST',
@@ -102,7 +101,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, comple
                                              json={PROMPT_KEY: 'These are a few of my favorite',
                                                    MAX_TOKENS_KEY: 10,
                                                    MODEL_KEY: llm_client._default_model,
-                                                   "temperature": 0.5},
+                                                   "temperature": 0.5, "top_p" : 0.5},
                                              raise_for_status=True)
 
 

diff --git a/tests/llm_api_client/google_client/test_google_client.py b/tests/llm_api_client/google_client/test_google_client.py
@@ -68,15 +68,15 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, params
         payload=load_json_resource("google/text_completion.json")
     )
 
-    actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10, blabla="aaa")
+    actual = await llm_client.text_completion(prompt="These are a few of my favorite", max_tokens=10, blabla="aaa", top_p= 0.95)
 
     assert actual == ['Once upon a time, there was a young girl named Lily...',
                       'Once upon a time, there was a young boy named Billy...']
     mock_aioresponse.assert_called_once_with(url, method='POST', params={AUTH_PARAM: llm_client._api_key},
                                              json={PROMPT_KEY: {TEXT_KEY: 'These are a few of my favorite'},
                                                    MAX_TOKENS_KEY: 10,
                                                    'temperature': None,
-                                                   'blabla': 'aaa'},
+                                                   'blabla': 'aaa',"topP" : 0.95},
                                              headers=llm_client._headers,
                                              raise_for_status=True,
                                              )

diff --git a/tests/llm_api_client/huggingface_client/test_huggingface.py b/tests/llm_api_client/huggingface_client/test_huggingface.py
@@ -28,7 +28,7 @@ async def test_text_completion__sanity(mock_aioresponse, llm_client, url):
     assert actual == ['Kobe Bryant is a retired professional basketball player who played for the Los Angeles Lakers of']
     mock_aioresponse.assert_called_once_with(url, method='POST',
                                              headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key},
-                                             json={'inputs': 'who is kobe bryant',"max_length": None, "temperature": 1.0},
+                                             json={'inputs': 'who is kobe bryant',"max_length": None, "temperature": 1.0, "top_p" : None},
                                              raise_for_status=True)
 
 
@@ -44,7 +44,7 @@ async def test_text_completion__with_kwargs(mock_aioresponse, llm_client, url):
     assert actual == ['Kobe Bryant is a retired professional basketball player who played for the Los Angeles Lakers of']
     mock_aioresponse.assert_called_once_with(url, method='POST',
                                              headers={AUTH_HEADER: BEARER_TOKEN + llm_client._api_key},
-                                             json={'inputs': 'who is kobe bryant',"max_length": 10, "temperature": 1.0},
+                                             json={'inputs': 'who is kobe bryant',"max_length": 10, "temperature": 1.0, "top_p" : None},
                                              raise_for_status=True)
 
 

diff --git a/tests/llm_api_client/openai_client/test_openai.py b/tests/llm_api_client/openai_client/test_openai.py
@@ -37,7 +37,7 @@ async def test_text_completion__sanity(openai_mock, open_ai_client, model_name):
     openai_mock.Completion.acreate.assert_awaited_once_with(
         model=model_name,
         prompt="These are a few of my favorite",
-        headers={},temperature=0,max_tokens=16)
+        headers={},temperature=0,max_tokens=16,top_p=1)
 
 
 @pytest.mark.asyncio
@@ -52,7 +52,7 @@ async def test_text_completion__return_multiple_completions(openai_mock, open_ai
     openai_mock.Completion.acreate.assert_awaited_once_with(
         model=model_name,
         prompt="These are a few of my favorite",
-        headers={},temperature=0,max_tokens=16)
+        headers={},temperature=0,max_tokens=16,top_p=1)
 
 
 @pytest.mark.asyncio
@@ -67,7 +67,7 @@ async def test_text_completion__override_model(openai_mock, open_ai_client, mode
     openai_mock.Completion.acreate.assert_awaited_once_with(
         model=new_model_name,
         prompt="These are a few of my favorite",
-        headers={},temperature=0,max_tokens=16)
+        headers={},temperature=0,max_tokens=16,top_p=1)
 
 
 @pytest.mark.asyncio
@@ -81,7 +81,7 @@ async def test_text_completion__with_kwargs(openai_mock, open_ai_client, model_n
     openai_mock.Completion.acreate.assert_awaited_once_with(
         model=model_name,
         prompt="These are a few of my favorite",
-        temperature=0,max_tokens=10,
+        temperature=0,max_tokens=10,top_p=1,
         headers={})
 
 
@@ -98,7 +98,7 @@ async def test_text_completion__with_headers(openai_mock, model_name):
     openai_mock.Completion.acreate.assert_awaited_once_with(
         model=model_name,
         prompt="These are a few of my favorite",
-        headers={"header_name": "header_value"},temperature=0,max_tokens=16)
+        headers={"header_name": "header_value"},temperature=0,max_tokens=16,top_p=1)
 
 
 @pytest.mark.asyncio
@@ -112,7 +112,7 @@ async def test_chat_completion__sanity(openai_mock, open_ai_client, model_name):
     openai_mock.ChatCompletion.acreate.assert_awaited_once_with(
         model=model_name,
         messages=[{'content': 'Hello!', 'role': 'user'}],
-        headers={},temperature=0,max_tokens=16)
+        headers={},temperature=0,max_tokens=16,top_p=1)
 
 
 @pytest.mark.asyncio
@@ -127,7 +127,7 @@ async def test_chat_completion__return_multiple_completions(openai_mock, open_ai
     openai_mock.ChatCompletion.acreate.assert_awaited_once_with(
         model=model_name,
         messages=[{'content': 'Hello!', 'role': 'user'}],
-        headers={},temperature=0,max_tokens=16)
+        headers={},temperature=0,max_tokens=16,top_p=1)
 
 
 @pytest.mark.asyncio
@@ -142,22 +142,22 @@ async def test_chat_completion__override_model(openai_mock, open_ai_client, mode
     openai_mock.ChatCompletion.acreate.assert_awaited_once_with(
         model=new_model_name,
         messages=[{'content': 'Hello!', 'role': 'user'}],
-        headers={},temperature=0,max_tokens=16)
+        headers={},temperature=0,max_tokens=16,top_p=1)
 
 
 @pytest.mark.asyncio
 async def test_chat_completion__with_kwargs(openai_mock, open_ai_client, model_name):
     openai_mock.ChatCompletion.acreate = AsyncMock(
         return_value=OpenAIObject.construct_from(load_json_resource("openai/chat_completion.json")))
 
-    actual = await open_ai_client.chat_completion([ChatMessage(Role.USER, "Hello!")], max_tokens=10)
+    actual = await open_ai_client.chat_completion([ChatMessage(Role.USER, "Hello!")], max_tokens=10,top_p=1)
 
     assert actual == ["\n\nHello there, how may I assist you today?"]
     openai_mock.ChatCompletion.acreate.assert_awaited_once_with(
         model=model_name,
         messages=[{'content': 'Hello!', 'role': 'user'}],
         max_tokens=10,
-        headers={},temperature=0)
+        headers={},temperature=0,top_p=1)
 
 
 @pytest.mark.asyncio
@@ -173,7 +173,7 @@ async def test_chat_completion__with_headers(openai_mock, model_name):
     openai_mock.ChatCompletion.acreate.assert_awaited_once_with(
         model=model_name,
         messages=[{'content': 'Hello!', 'role': 'user'}],
-        headers={"header_name": "header_value"},temperature=0,max_tokens=16)
+        headers={"header_name": "header_value"},temperature=0,max_tokens=16,top_p=1)
 
 
 @pytest.mark.asyncio

diff --git a/tests/resources/openai/chat_completion.json b/tests/resources/openai/chat_completion.json
@@ -13,6 +13,5 @@
   "usage": {
     "prompt_tokens": 9,
     "completion_tokens": 12,
-    "total_tokens": 21
-  }
+    "total_tokens": 21}
 }
diff --git a/tests/resources/openai/text_completion.json b/tests/resources/openai/text_completion.json
@@ -14,6 +14,5 @@
   "usage": {
     "prompt_tokens": 5,
     "completion_tokens": 7,
-    "total_tokens": 12
-  }
+    "total_tokens": 12}
 }