feat(functions): relax mixedgrammars (#2365)

* feat(functions): relax mixedgrammars Extend even more the functionalities and when mixed mode is enabled, tolerate also both strings and JSON in the result - in this case we make sure that the JSON can be correctly parsed. This also updates the examples and the gallery model to configure the grammar. The changeset also breaks current function/grammar configuration as it reserves now a stanza in the YAML config. For example: ```yaml function: grammar: # This allows the grammar to also return messages mixed_mode: true # Suffix to add to the grammar # prefix: '<tool_call>\n' # Force parallel calls in the grammar # parallel_calls: true ``` Signed-off-by: Ettore Di Giacinto <[email protected]> * refactor, add a way to disable mixed json and freestring Signed-off-by: Ettore Di Giacinto <[email protected]> * Fix linting issues Signed-off-by: Ettore Di Giacinto <[email protected]> --------- Signed-off-by: Ettore Di Giacinto <[email protected]>
mudler · May 21, 2024 · 491e1d7 · 491e1d7
1 parent 1542c58
commit 491e1d7
Show file tree

Hide file tree

Showing 10 changed files with 469 additions and 149 deletions.
diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
@@ -3,7 +3,61 @@ mmap: true
 parameters:
   model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
 
+stopwords:
+- "<|im_end|>"
+- "<dummy32000>"
+- "</tool_call>"
+- "<|eot_id|>"
+- "<|end_of_text|>"
+
+function:
+  # disable injecting the "answer" tool
+  disable_no_action: true
+
+  grammar:
+    # This allows the grammar to also return messages
+    mixed_mode: true
+    # Suffix to add to the grammar
+    #prefix: '<tool_call>\n'
+    # Force parallel calls in the grammar
+    # parallel_calls: true
+
+  return_name_in_function_response: true
+  # Without grammar uncomment the lines below
+  # Warning: this is relying only on the capability of the
+  # LLM model to generate the correct function call.
+  json_regex_match: 
+   - "(?s)<tool_call>(.*?)</tool_call>"
+   - "(?s)<tool_call>(.*?)"
+  replace_llm_results:
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+  replace_function_results: 
+  # Replace everything that is not JSON array or object
+  # 
+  - key: '(?s)^[^{\[]*'
+    value: ""
+  - key: '(?s)[^}\]]*$'
+    value: ""
+  - key: "'([^']*?)'"
+    value: "_DQUOTE_${1}_DQUOTE_"
+  - key: '\\"'
+    value: "__TEMP_QUOTE__"
+  - key: "\'"
+    value: "'"
+  - key: "_DQUOTE_"
+    value: '"'
+  - key: "__TEMP_QUOTE__"
+    value: '"'
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+
 template:
+  chat: |
+    {{.Input -}}
+    <|im_start|>assistant
   chat_message: |
     <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
     {{- if .FunctionCall }}
@@ -22,38 +76,25 @@ template:
     {{- else if eq .RoleName "tool" }}
     </tool_response>
     {{- end }}<|im_end|>
-  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
-  function: |
+  completion: |
+    {{.Input}}
+  function: |-
     <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+    You are a function calling AI model.
+    Here are the available tools:
     <tools>
     {{range .Functions}}
     {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
     {{end}}
     </tools>
-    Use the following pydantic model json schema for each tool call you will make:
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
-    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+    You should call the tools provided to you sequentially
+    Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
+    <scratchpad>
+    {step-by-step reasoning and plan in bullet points}
+    </scratchpad>
+    For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
     <tool_call>
-    {'arguments': <args-dict>, 'name': <function-name>}
+    {"arguments": <args-dict>, "name": <function-name>}
     </tool_call><|im_end|>
     {{.Input -}}
     <|im_start|>assistant
-    <tool_call>
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-- "\n</tool_call>"
-- "\n\n\n"
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "gpt-4",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
@@ -3,7 +3,61 @@ mmap: true
 parameters:
   model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
 
+stopwords:
+- "<|im_end|>"
+- "<dummy32000>"
+- "</tool_call>"
+- "<|eot_id|>"
+- "<|end_of_text|>"
+
+function:
+  # disable injecting the "answer" tool
+  disable_no_action: true
+
+  grammar:
+    # This allows the grammar to also return messages
+    mixed_mode: true
+    # Suffix to add to the grammar
+    #prefix: '<tool_call>\n'
+    # Force parallel calls in the grammar
+    # parallel_calls: true
+
+  return_name_in_function_response: true
+  # Without grammar uncomment the lines below
+  # Warning: this is relying only on the capability of the
+  # LLM model to generate the correct function call.
+  json_regex_match: 
+   - "(?s)<tool_call>(.*?)</tool_call>"
+   - "(?s)<tool_call>(.*?)"
+  replace_llm_results:
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+  replace_function_results: 
+  # Replace everything that is not JSON array or object
+  # 
+  - key: '(?s)^[^{\[]*'
+    value: ""
+  - key: '(?s)[^}\]]*$'
+    value: ""
+  - key: "'([^']*?)'"
+    value: "_DQUOTE_${1}_DQUOTE_"
+  - key: '\\"'
+    value: "__TEMP_QUOTE__"
+  - key: "\'"
+    value: "'"
+  - key: "_DQUOTE_"
+    value: '"'
+  - key: "__TEMP_QUOTE__"
+    value: '"'
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+
 template:
+  chat: |
+    {{.Input -}}
+    <|im_start|>assistant
   chat_message: |
     <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
     {{- if .FunctionCall }}
@@ -22,38 +76,25 @@ template:
     {{- else if eq .RoleName "tool" }}
     </tool_response>
     {{- end }}<|im_end|>
-  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
-  function: |
+  completion: |
+    {{.Input}}
+  function: |-
     <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+    You are a function calling AI model.
+    Here are the available tools:
     <tools>
     {{range .Functions}}
     {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
     {{end}}
     </tools>
-    Use the following pydantic model json schema for each tool call you will make:
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
-    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+    You should call the tools provided to you sequentially
+    Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
+    <scratchpad>
+    {step-by-step reasoning and plan in bullet points}
+    </scratchpad>
+    For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
     <tool_call>
-    {'arguments': <args-dict>, 'name': <function-name>}
+    {"arguments": <args-dict>, "name": <function-name>}
     </tool_call><|im_end|>
     {{.Input -}}
-    <|im_start|>assistant
-    <tool_call>
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-- "\n</tool_call>"
-- "\n\n\n"
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "gpt-4",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
+    <|im_start|>assistant
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
@@ -4,7 +4,61 @@ f16: false
 parameters:
   model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
 
+stopwords:
+- "<|im_end|>"
+- "<dummy32000>"
+- "</tool_call>"
+- "<|eot_id|>"
+- "<|end_of_text|>"
+
+function:
+  # disable injecting the "answer" tool
+  disable_no_action: true
+
+  grammar:
+    # This allows the grammar to also return messages
+    mixed_mode: true
+    # Suffix to add to the grammar
+    #prefix: '<tool_call>\n'
+    # Force parallel calls in the grammar
+    # parallel_calls: true
+
+  return_name_in_function_response: true
+  # Without grammar uncomment the lines below
+  # Warning: this is relying only on the capability of the
+  # LLM model to generate the correct function call.
+  json_regex_match: 
+   - "(?s)<tool_call>(.*?)</tool_call>"
+   - "(?s)<tool_call>(.*?)"
+  replace_llm_results:
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+  replace_function_results: 
+  # Replace everything that is not JSON array or object
+  # 
+  - key: '(?s)^[^{\[]*'
+    value: ""
+  - key: '(?s)[^}\]]*$'
+    value: ""
+  - key: "'([^']*?)'"
+    value: "_DQUOTE_${1}_DQUOTE_"
+  - key: '\\"'
+    value: "__TEMP_QUOTE__"
+  - key: "\'"
+    value: "'"
+  - key: "_DQUOTE_"
+    value: '"'
+  - key: "__TEMP_QUOTE__"
+    value: '"'
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+
 template:
+  chat: |
+    {{.Input -}}
+    <|im_start|>assistant
   chat_message: |
     <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
     {{- if .FunctionCall }}
@@ -23,37 +77,25 @@ template:
     {{- else if eq .RoleName "tool" }}
     </tool_response>
     {{- end }}<|im_end|>
-  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
-  function: |
+  completion: |
+    {{.Input}}
+  function: |-
     <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+    You are a function calling AI model.
+    Here are the available tools:
     <tools>
     {{range .Functions}}
     {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
     {{end}}
     </tools>
-    Use the following pydantic model json schema for each tool call you will make:
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
-    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+    You should call the tools provided to you sequentially
+    Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
+    <scratchpad>
+    {step-by-step reasoning and plan in bullet points}
+    </scratchpad>
+    For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
     <tool_call>
-    {'arguments': <args-dict>, 'name': <function-name>}
+    {"arguments": <args-dict>, "name": <function-name>}
     </tool_call><|im_end|>
     {{.Input -}}
     <|im_start|>assistant
-    <tool_call>
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-stopwords:
-- <|im_end|>
-- "\n</tool_call>"
-- <dummy32000>
-- "\n\n\n"
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "gpt-4",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
@@ -67,6 +67,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			return true
 		})
 
+		result = functions.CleanupLLMResult(result, config.FunctionsConfig)
 		results := functions.ParseFunctionCall(result, config.FunctionsConfig)
 		noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
 
@@ -192,7 +193,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		}
 
 		switch {
-		case !config.FunctionsConfig.NoGrammar && shouldUseFn:
+		case !config.FunctionsConfig.GrammarConfig.NoGrammar && shouldUseFn:
 			noActionGrammar := functions.Function{
 				Name:        noActionName,
 				Description: noActionDescription,
@@ -219,15 +220,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			// Handle if we should return "name" instead of "functions"
 			if config.FunctionsConfig.FunctionName {
 				jsStruct := funcs.ToJSONNameStructure()
-				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
+				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 			} else {
 				jsStruct := funcs.ToJSONFunctionStructure()
-				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
+				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 			}
 		case input.JSONFunctionGrammarObject != nil:
-			config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
+			config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		case input.JSONFunctionGrammarObjectName != nil:
-			config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
+			config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		default:
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
@@ -470,6 +471,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 					return
 				}
 
+				s = functions.CleanupLLMResult(s, config.FunctionsConfig)
 				results := functions.ParseFunctionCall(s, config.FunctionsConfig)
 				noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0