diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml
index 902b9683fff..ec1f3753ead 100644
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -3,7 +3,61 @@ mmap: true
 parameters:
   model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
 
+stopwords:
+- "<|im_end|>"
+- "<dummy32000>"
+- "</tool_call>"
+- "<|eot_id|>"
+- "<|end_of_text|>"
+
+function:
+  # disable injecting the "answer" tool
+  disable_no_action: true
+
+  grammar:
+    # This allows the grammar to also return messages
+    mixed_mode: true
+    # Suffix to add to the grammar
+    #prefix: '<tool_call>\n'
+    # Force parallel calls in the grammar
+    # parallel_calls: true
+
+  return_name_in_function_response: true
+  # Without grammar uncomment the lines below
+  # Warning: this is relying only on the capability of the
+  # LLM model to generate the correct function call.
+  json_regex_match: 
+   - "(?s)<tool_call>(.*?)</tool_call>"
+   - "(?s)<tool_call>(.*?)"
+  replace_llm_results:
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+  replace_function_results: 
+  # Replace everything that is not JSON array or object
+  # 
+  - key: '(?s)^[^{\[]*'
+    value: ""
+  - key: '(?s)[^}\]]*$'
+    value: ""
+  - key: "'([^']*?)'"
+    value: "_DQUOTE_${1}_DQUOTE_"
+  - key: '\\"'
+    value: "__TEMP_QUOTE__"
+  - key: "\'"
+    value: "'"
+  - key: "_DQUOTE_"
+    value: '"'
+  - key: "__TEMP_QUOTE__"
+    value: '"'
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+
 template:
+  chat: |
+    {{.Input -}}
+    <|im_start|>assistant
   chat_message: |
     <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
     {{- if .FunctionCall }}
@@ -22,38 +76,25 @@ template:
     {{- else if eq .RoleName "tool" }}
     </tool_response>
     {{- end }}<|im_end|>
-  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
-  function: |
+  completion: |
+    {{.Input}}
+  function: |-
     <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+    You are a function calling AI model.
+    Here are the available tools:
     <tools>
     {{range .Functions}}
     {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
     {{end}}
     </tools>
-    Use the following pydantic model json schema for each tool call you will make:
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
-    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+    You should call the tools provided to you sequentially
+    Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
+    <scratchpad>
+    {step-by-step reasoning and plan in bullet points}
+    </scratchpad>
+    For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
     <tool_call>
-    {'arguments': <args-dict>, 'name': <function-name>}
+    {"arguments": <args-dict>, "name": <function-name>}
     </tool_call><|im_end|>
     {{.Input -}}
     <|im_start|>assistant
-    <tool_call>
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-- "\n</tool_call>"
-- "\n\n\n"
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "gpt-4",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
index 902b9683fff..a8904f93dc1 100644
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -3,7 +3,61 @@ mmap: true
 parameters:
   model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
 
+stopwords:
+- "<|im_end|>"
+- "<dummy32000>"
+- "</tool_call>"
+- "<|eot_id|>"
+- "<|end_of_text|>"
+
+function:
+  # disable injecting the "answer" tool
+  disable_no_action: true
+
+  grammar:
+    # This allows the grammar to also return messages
+    mixed_mode: true
+    # Suffix to add to the grammar
+    #prefix: '<tool_call>\n'
+    # Force parallel calls in the grammar
+    # parallel_calls: true
+
+  return_name_in_function_response: true
+  # Without grammar uncomment the lines below
+  # Warning: this is relying only on the capability of the
+  # LLM model to generate the correct function call.
+  json_regex_match: 
+   - "(?s)<tool_call>(.*?)</tool_call>"
+   - "(?s)<tool_call>(.*?)"
+  replace_llm_results:
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+  replace_function_results: 
+  # Replace everything that is not JSON array or object
+  # 
+  - key: '(?s)^[^{\[]*'
+    value: ""
+  - key: '(?s)[^}\]]*$'
+    value: ""
+  - key: "'([^']*?)'"
+    value: "_DQUOTE_${1}_DQUOTE_"
+  - key: '\\"'
+    value: "__TEMP_QUOTE__"
+  - key: "\'"
+    value: "'"
+  - key: "_DQUOTE_"
+    value: '"'
+  - key: "__TEMP_QUOTE__"
+    value: '"'
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+
 template:
+  chat: |
+    {{.Input -}}
+    <|im_start|>assistant
   chat_message: |
     <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
     {{- if .FunctionCall }}
@@ -22,38 +76,25 @@ template:
     {{- else if eq .RoleName "tool" }}
     </tool_response>
     {{- end }}<|im_end|>
-  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
-  function: |
+  completion: |
+    {{.Input}}
+  function: |-
     <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+    You are a function calling AI model.
+    Here are the available tools:
     <tools>
     {{range .Functions}}
     {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
     {{end}}
     </tools>
-    Use the following pydantic model json schema for each tool call you will make:
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
-    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+    You should call the tools provided to you sequentially
+    Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
+    <scratchpad>
+    {step-by-step reasoning and plan in bullet points}
+    </scratchpad>
+    For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
     <tool_call>
-    {'arguments': <args-dict>, 'name': <function-name>}
+    {"arguments": <args-dict>, "name": <function-name>}
     </tool_call><|im_end|>
     {{.Input -}}
-    <|im_start|>assistant
-    <tool_call>
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-- "\n</tool_call>"
-- "\n\n\n"
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "gpt-4",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
+    <|im_start|>assistant
\ No newline at end of file
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
index bc11d4d7f8d..69693ec03d6 100644
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -4,7 +4,61 @@ f16: false
 parameters:
   model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
 
+stopwords:
+- "<|im_end|>"
+- "<dummy32000>"
+- "</tool_call>"
+- "<|eot_id|>"
+- "<|end_of_text|>"
+
+function:
+  # disable injecting the "answer" tool
+  disable_no_action: true
+
+  grammar:
+    # This allows the grammar to also return messages
+    mixed_mode: true
+    # Suffix to add to the grammar
+    #prefix: '<tool_call>\n'
+    # Force parallel calls in the grammar
+    # parallel_calls: true
+
+  return_name_in_function_response: true
+  # Without grammar uncomment the lines below
+  # Warning: this is relying only on the capability of the
+  # LLM model to generate the correct function call.
+  json_regex_match: 
+   - "(?s)<tool_call>(.*?)</tool_call>"
+   - "(?s)<tool_call>(.*?)"
+  replace_llm_results:
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+  replace_function_results: 
+  # Replace everything that is not JSON array or object
+  # 
+  - key: '(?s)^[^{\[]*'
+    value: ""
+  - key: '(?s)[^}\]]*$'
+    value: ""
+  - key: "'([^']*?)'"
+    value: "_DQUOTE_${1}_DQUOTE_"
+  - key: '\\"'
+    value: "__TEMP_QUOTE__"
+  - key: "\'"
+    value: "'"
+  - key: "_DQUOTE_"
+    value: '"'
+  - key: "__TEMP_QUOTE__"
+    value: '"'
+  # Drop the scratchpad content from responses
+  - key: "(?s)<scratchpad>.*</scratchpad>"
+    value: ""
+
 template:
+  chat: |
+    {{.Input -}}
+    <|im_start|>assistant
   chat_message: |
     <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
     {{- if .FunctionCall }}
@@ -23,37 +77,25 @@ template:
     {{- else if eq .RoleName "tool" }}
     </tool_response>
     {{- end }}<|im_end|>
-  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
-  function: |
+  completion: |
+    {{.Input}}
+  function: |-
     <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+    You are a function calling AI model.
+    Here are the available tools:
     <tools>
     {{range .Functions}}
     {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
     {{end}}
     </tools>
-    Use the following pydantic model json schema for each tool call you will make:
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
-    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+    You should call the tools provided to you sequentially
+    Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
+    <scratchpad>
+    {step-by-step reasoning and plan in bullet points}
+    </scratchpad>
+    For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
     <tool_call>
-    {'arguments': <args-dict>, 'name': <function-name>}
+    {"arguments": <args-dict>, "name": <function-name>}
     </tool_call><|im_end|>
     {{.Input -}}
     <|im_start|>assistant
-    <tool_call>
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-stopwords:
-- <|im_end|>
-- "\n</tool_call>"
-- <dummy32000>
-- "\n\n\n"
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "gpt-4",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index a82bc925bc8..341dc34bec0 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -67,6 +67,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			return true
 		})
 
+		result = functions.CleanupLLMResult(result, config.FunctionsConfig)
 		results := functions.ParseFunctionCall(result, config.FunctionsConfig)
 		noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
 
@@ -192,7 +193,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		}
 
 		switch {
-		case !config.FunctionsConfig.NoGrammar && shouldUseFn:
+		case !config.FunctionsConfig.GrammarConfig.NoGrammar && shouldUseFn:
 			noActionGrammar := functions.Function{
 				Name:        noActionName,
 				Description: noActionDescription,
@@ -219,15 +220,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			// Handle if we should return "name" instead of "functions"
 			if config.FunctionsConfig.FunctionName {
 				jsStruct := funcs.ToJSONNameStructure()
-				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
+				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 			} else {
 				jsStruct := funcs.ToJSONFunctionStructure()
-				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
+				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 			}
 		case input.JSONFunctionGrammarObject != nil:
-			config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
+			config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		case input.JSONFunctionGrammarObjectName != nil:
-			config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
+			config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		default:
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
@@ -470,6 +471,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 					return
 				}
 
+				s = functions.CleanupLLMResult(s, config.FunctionsConfig)
 				results := functions.ParseFunctionCall(s, config.FunctionsConfig)
 				noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
 
diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml
index 3c15b44148f..195ebbc99b5 100644
--- a/gallery/hermes-2-pro-mistral.yaml
+++ b/gallery/hermes-2-pro-mistral.yaml
@@ -3,7 +3,55 @@ name: "hermes-2-pro-mistral"
 
 config_file: |
   mmap: true
+  stopwords:
+  - "<|im_end|>"
+  - "<dummy32000>"
+  - "</tool_call>"
+  - "<|eot_id|>"
+  - "<|end_of_text|>"
+  function:
+    # disable injecting the "answer" tool
+    disable_no_action: true
+
+    grammar:
+      # This allows the grammar to also return messages
+      mixed_mode: true
+
+    return_name_in_function_response: true
+    # Without grammar uncomment the lines below
+    # Warning: this is relying only on the capability of the
+    # LLM model to generate the correct function call.
+    json_regex_match:
+    - "(?s)<tool_call>(.*?)</tool_call>"
+    - "(?s)<tool_call>(.*?)"
+    replace_llm_results:
+    # Drop the scratchpad content from responses
+    - key: "(?s)<scratchpad>.*</scratchpad>"
+      value: ""
+    replace_function_results:
+    # Replace everything that is not JSON array or object
+    - key: '(?s)^[^{\[]*'
+      value: ""
+    - key: '(?s)[^}\]]*$'
+      value: ""
+    - key: "'([^']*?)'"
+      value: "_DQUOTE_${1}_DQUOTE_"
+    - key: '\\"'
+      value: "__TEMP_QUOTE__"
+    - key: "\'"
+      value: "'"
+    - key: "_DQUOTE_"
+      value: '"'
+    - key: "__TEMP_QUOTE__"
+      value: '"'
+    # Drop the scratchpad content from responses
+    - key: "(?s)<scratchpad>.*</scratchpad>"
+      value: ""
+
   template:
+    chat: |
+      {{.Input -}}
+      <|im_start|>assistant
     chat_message: |
       <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
       {{- if .FunctionCall }}
@@ -22,31 +70,25 @@ config_file: |
       {{- else if eq .RoleName "tool" }}
       </tool_response>
       {{- end }}<|im_end|>
-    # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
-    function: |
+    completion: |
+      {{.Input}}
+    function: |-
       <|im_start|>system
-      You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+      You are a function calling AI model.
+      Here are the available tools:
       <tools>
       {{range .Functions}}
       {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
       {{end}}
       </tools>
-      Use the following pydantic model json schema for each tool call you will make:
-      {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
-      For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+      You should call the tools provided to you sequentially
+      Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
+      <scratchpad>
+      {step-by-step reasoning and plan in bullet points}
+      </scratchpad>
+      For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
       <tool_call>
-      {'arguments': <args-dict>, 'name': <function-name>}
+      {"arguments": <args-dict>, "name": <function-name>}
       </tool_call><|im_end|>
       {{.Input -}}
       <|im_start|>assistant
-      <tool_call>
-    chat: |
-      {{.Input -}}
-      <|im_start|>assistant
-    completion: |
-      {{.Input}}
-  context_size: 4096
-  f16: true
-  stopwords:
-  - <|im_end|>
-  - '</tool_call>'
diff --git a/pkg/functions/grammar_json_schema.go b/pkg/functions/grammar_json_schema.go
index 6f056b53bac..30c1901fad5 100644
--- a/pkg/functions/grammar_json_schema.go
+++ b/pkg/functions/grammar_json_schema.go
@@ -117,7 +117,16 @@ const array = `arr  ::=
     (",\n"  realvalue)*
   )? "]"`
 
-func (sc *JSONSchemaConverter) finalizeGrammar(suffix string, maybeArray, maybeString bool) string {
+func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {
+
+	grammarOpts := &GrammarOption{}
+	grammarOpts.Apply(options...)
+
+	suffix := grammarOpts.Suffix
+	maybeArray := grammarOpts.MaybeArray
+	maybeString := grammarOpts.MaybeString
+	noMixedFreeString := grammarOpts.NoMixedFreeString
+
 	var lines []string
 
 	swapRoot := maybeArray || maybeString || suffix != ""
@@ -140,6 +149,11 @@ func (sc *JSONSchemaConverter) finalizeGrammar(suffix string, maybeArray, maybeS
 		newRoot = "arr | realvalue"
 	}
 
+	freestringRule := "mixedstring"
+	if noMixedFreeString {
+		freestringRule = "freestring"
+	}
+
 	if suffix != "" {
 		// quote newlines in suffix
 		suffix = utils.EscapeNewLines(suffix)
@@ -150,7 +164,7 @@ func (sc *JSONSchemaConverter) finalizeGrammar(suffix string, maybeArray, maybeS
 
 		if maybeString {
 			//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
-			newRoot = "( \"" + suffix + "\" " + newRoot + " | freestring ) "
+			newRoot = "( \"" + suffix + "\" " + newRoot + " | " + freestringRule + " ) "
 		} else {
 			newRoot = "\"" + suffix + "\" " + "" + newRoot + ""
 		}
@@ -159,12 +173,18 @@ func (sc *JSONSchemaConverter) finalizeGrammar(suffix string, maybeArray, maybeS
 			//	newRoot = "(" + newRoot + ")"
 		}
 
-		newRoot = "freestring | " + newRoot
+		newRoot = freestringRule + " | " + newRoot
 	}
 
 	lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
 	lines = append(lines, array)
 
+	if maybeArray {
+		lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
+	} else {
+		lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
+	}
+
 	return strings.Join(lines, "\n")
 }
 
@@ -289,16 +309,16 @@ func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[strin
 
 	return def
 }
-func (sc *JSONSchemaConverter) Grammar(suffix string, schema map[string]interface{}, maybeArray, maybeString bool) string {
+func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) string {
 	sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
 	sc.visit(schema, "", schema)
-	return sc.finalizeGrammar(suffix, maybeArray, maybeString)
+	return sc.finalizeGrammar(options...)
 }
 
-func (sc *JSONSchemaConverter) GrammarFromBytes(suffix string, b []byte, maybeArray, maybeString bool) string {
+func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) string {
 	var schema map[string]interface{}
 	_ = json.Unmarshal(b, &schema)
-	return sc.Grammar(suffix, schema, maybeArray, maybeString)
+	return sc.Grammar(schema, options...)
 }
 
 func jsonString(v interface{}) string {
@@ -341,9 +361,12 @@ type JSONFunctionStructureName struct {
 	Defs  map[string]interface{} `json:"$defs,omitempty"`
 }
 
-func (j JSONFunctionStructureName) Grammar(suffix string, propOrder string, maybeArray, maybeString bool) string {
+func (j JSONFunctionStructureName) Grammar(options ...func(*GrammarOption)) string {
+	grammarOpts := &GrammarOption{}
+	grammarOpts.Apply(options...)
+
 	dat, _ := json.Marshal(j)
-	return NewJSONSchemaConverter(propOrder).GrammarFromBytes(suffix, dat, maybeArray, maybeString)
+	return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
 }
 
 type JSONFunctionStructureFunction struct {
@@ -352,7 +375,10 @@ type JSONFunctionStructureFunction struct {
 	Defs  map[string]interface{} `json:"$defs,omitempty"`
 }
 
-func (j JSONFunctionStructureFunction) Grammar(suffix string, propOrder string, maybeArray, maybeString bool) string {
+func (j JSONFunctionStructureFunction) Grammar(options ...func(*GrammarOption)) string {
+	grammarOpts := &GrammarOption{}
+	grammarOpts.Apply(options...)
+
 	dat, _ := json.Marshal(j)
-	return NewJSONSchemaConverter(propOrder).GrammarFromBytes(suffix, dat, maybeArray, maybeString)
+	return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
 }
diff --git a/pkg/functions/grammar_json_schema_test.go b/pkg/functions/grammar_json_schema_test.go
index 1a578cc4388..3a864488bf0 100644
--- a/pkg/functions/grammar_json_schema_test.go
+++ b/pkg/functions/grammar_json_schema_test.go
@@ -3,6 +3,7 @@ package functions_test
 import (
 	"strings"
 
+	"github.com/go-skynet/LocalAI/pkg/functions"
 	. "github.com/go-skynet/LocalAI/pkg/functions"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -248,7 +249,7 @@ root-1-name ::= "\"search\""`
 var _ = Describe("JSON schema grammar tests", func() {
 	Context("JSON", func() {
 		It("generates a valid grammar from JSON schema", func() {
-			grammar := NewJSONSchemaConverter("").GrammarFromBytes("", []byte(testInput1), false, false)
+			grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
 			results := strings.Split(inputResult1, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -258,7 +259,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
 		})
 		It("generates a valid grammar from JSON schema", func() {
-			grammar := NewJSONSchemaConverter("").GrammarFromBytes("", []byte(testInput2), false, false)
+			grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
 			results := strings.Split(inputResult3, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -272,7 +273,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructureFunction{
 				OneOf: testFunctions}
 
-			grammar := structuredGrammar.Grammar("", "", false, false)
+			grammar := structuredGrammar.Grammar()
 			results := strings.Split(inputResult1, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -286,8 +287,12 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructureFunction{
 				OneOf: testFunctions}
 
-			grammar := structuredGrammar.Grammar("", "", true, false)
-			results := strings.Split(inputResult2, "\n")
+			grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
+			results := strings.Split(
+				strings.Join([]string{
+					inputResult2,
+					"mixedstring ::= freestring | freestring arr | freestring realvalue"}, "\n"),
+				"\n")
 			for _, r := range results {
 				if r != "" {
 					Expect(grammar).To(ContainSubstring(r))
@@ -300,8 +305,12 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructureName{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar("", "", true, false)
-			results := strings.Split(inputResult4, "\n")
+			grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
+			results := strings.Split(
+				strings.Join([]string{
+					inputResult4,
+					"mixedstring ::= freestring | freestring arr | freestring realvalue"}, "\n"),
+				"\n")
 			for _, r := range results {
 				if r != "" {
 					Expect(grammar).To(ContainSubstring(r))
@@ -314,8 +323,15 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructureName{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar("suffix", "", true, false)
-			results := strings.Split(rootResult(`"suffix" arr | realvalue`), "\n")
+			grammar := structuredGrammar.Grammar(
+				functions.SetPrefix("suffix"),
+				functions.EnableMaybeArray,
+			)
+			results := strings.Split(
+				strings.Join([]string{
+					rootResult(`"suffix" arr | realvalue`),
+					"mixedstring ::= freestring | freestring arr | freestring realvalue"}, "\n"),
+				"\n")
 			for _, r := range results {
 				if r != "" {
 					Expect(grammar).To(ContainSubstring(r))
@@ -327,8 +343,12 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructureName{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar("suffix", "", false, false)
-			results := strings.Split(rootResult(`"suffix" realvalue`), "\n")
+			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"))
+			results := strings.Split(
+				strings.Join([]string{
+					rootResult(`"suffix" realvalue`),
+					"mixedstring ::= freestring | freestring realvalue"}, "\n"),
+				"\n")
 			for _, r := range results {
 				if r != "" {
 					Expect(grammar).To(ContainSubstring(r))
@@ -340,8 +360,12 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructureName{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar("suffix", "", false, true)
-			results := strings.Split(rootResult(`( "suffix" realvalue | freestring )`), "\n")
+			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString)
+			results := strings.Split(
+				strings.Join([]string{
+					rootResult(`( "suffix" realvalue | mixedstring )`),
+					"mixedstring ::= freestring | freestring realvalue"}, "\n"),
+				"\n")
 			for _, r := range results {
 				if r != "" {
 					Expect(grammar).To(ContainSubstring(r))
@@ -353,8 +377,13 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructureName{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar("suffix", "", true, true)
-			results := strings.Split(rootResult(`( "suffix" (arr | realvalue) | freestring )`), "\n")
+			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString, functions.EnableMaybeArray)
+			results := strings.Split(
+				strings.Join([]string{
+					rootResult(`( "suffix" (arr | realvalue) | mixedstring )`),
+					"mixedstring ::= freestring | freestring arr | freestring realvalue"}, "\n"),
+				"\n")
+
 			for _, r := range results {
 				if r != "" {
 					Expect(grammar).To(ContainSubstring(r))
@@ -367,8 +396,30 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructureName{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar("", "", true, true)
-			results := strings.Split(rootResult(`freestring | arr | realvalue`), "\n")
+			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray)
+			results := strings.Split(
+				strings.Join([]string{
+					rootResult(`mixedstring | arr | realvalue`),
+					"mixedstring ::= freestring | freestring arr | freestring realvalue"}, "\n"),
+				"\n")
+			for _, r := range results {
+				if r != "" {
+					Expect(grammar).To(ContainSubstring(r))
+				}
+			}
+			Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))), grammar)
+		})
+
+		It("generates a valid grammar from JSON Objects without a suffix that could return text or an array of tools or just string. Disables mixedstring", func() {
+			structuredGrammar := JSONFunctionStructureName{
+				OneOf: testFunctionsName}
+
+			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.NoMixedFreeString)
+			results := strings.Split(
+				strings.Join([]string{
+					rootResult(`freestring | arr | realvalue`),
+					"mixedstring ::= freestring | freestring arr | freestring realvalue"}, "\n"),
+				"\n")
 			for _, r := range results {
 				if r != "" {
 					Expect(grammar).To(ContainSubstring(r))
diff --git a/pkg/functions/options.go b/pkg/functions/options.go
new file mode 100644
index 00000000000..10bbe314710
--- /dev/null
+++ b/pkg/functions/options.go
@@ -0,0 +1,33 @@
+package functions
+
+type GrammarOption struct {
+	PropOrder         string
+	Suffix            string
+	MaybeArray        bool
+	MaybeString       bool
+	NoMixedFreeString bool
+}
+
+func (o *GrammarOption) Apply(options ...func(*GrammarOption)) {
+	for _, l := range options {
+		l(o)
+	}
+}
+
+var EnableMaybeArray = func(o *GrammarOption) {
+	o.MaybeArray = true
+}
+
+var EnableMaybeString = func(o *GrammarOption) {
+	o.MaybeString = true
+}
+
+var NoMixedFreeString func(*GrammarOption) = func(o *GrammarOption) {
+	o.NoMixedFreeString = true
+}
+
+func SetPrefix(suffix string) func(*GrammarOption) {
+	return func(o *GrammarOption) {
+		o.Suffix = suffix
+	}
+}
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index b551a40ed97..735263c746c 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -8,6 +8,26 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
+type GrammarConfig struct {
+	// ParallelCalls enables the LLM to return multiple function calls in the same response
+	ParallelCalls bool `yaml:"parallel_calls"`
+
+	// MixedMode enables the LLM to return strings and not only JSON objects
+	// This is useful for models to not constraing returning only JSON and also messages back to the user
+	MixedMode bool `yaml:"mixed_mode"`
+
+	// NoMixedFreeString disables the mixed mode for free strings
+	// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects
+	NoMixedFreeString bool `yaml:"no_mixed_free_string"`
+
+	// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
+	NoGrammar bool `yaml:"disable"`
+
+	// Prefix is the suffix to append to the grammar when being generated
+	// This is useful when models prepend a tag before returning JSON
+	Prefix string `yaml:"prefix"`
+}
+
 // FunctionsConfig is the configuration for the tool/function call.
 // It includes setting to map the function name and arguments from the response
 // and, for instance, also if processing the requests with BNF grammars.
@@ -16,34 +36,26 @@ type FunctionsConfig struct {
 	// By default we inject a tool that does nothing and is used to return an answer from the LLM
 	DisableNoAction bool `yaml:"disable_no_action"`
 
+	// Grammar is the configuration for the grammar
+	GrammarConfig GrammarConfig `yaml:"grammar"`
+
 	// NoActionFunctionName is the name of the function that does nothing. It defaults to "answer"
 	NoActionFunctionName string `yaml:"no_action_function_name"`
 
 	// NoActionDescriptionName is the name of the function that returns the description of the no action function
 	NoActionDescriptionName string `yaml:"no_action_description_name"`
 
-	// ParallelCalls enables the LLM to return multiple function calls in the same response
-	ParallelCalls bool `yaml:"parallel_calls"`
-
-	// GrammarMessage enables the LLM to return strings and not only JSON objects
-	// This is useful for models to not constraing returning only JSON and also messages back to the user
-	GrammarMessage bool `yaml:"grammar_message"`
-
-	// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
-	NoGrammar bool `yaml:"no_grammar"`
-
 	// ResponseRegex is a named regex to extract the function name and arguments from the response
 	ResponseRegex string `yaml:"response_regex"`
 
 	// JSONRegexMatch is a regex to extract the JSON object from the response
 	JSONRegexMatch []string `yaml:"json_regex_match"`
 
-	// GrammarPrefix is the suffix to append to the grammar when being generated
-	// This is useful when models prepend a tag before returning JSON
-	GrammarPrefix string `yaml:"grammar_prefix"`
+	// ReplaceFunctionResults allow to replace strings in the results before parsing them
+	ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results"`
 
-	// ReplaceResults allow to replace strings in the results before parsing them
-	ReplaceResults []ReplaceResult `yaml:"replace_results"`
+	// ReplaceLLMResult allow to replace strings in the results before parsing them
+	ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results"`
 
 	// FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
 	// instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }.
@@ -61,18 +73,49 @@ type FuncCallResults struct {
 	Arguments string
 }
 
-func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncCallResults {
+func (g GrammarConfig) Options() []func(o *GrammarOption) {
+	opts := []func(o *GrammarOption){}
+	if g.MixedMode {
+		opts = append(opts, EnableMaybeString)
+	}
+	if g.ParallelCalls {
+		opts = append(opts, EnableMaybeArray)
+	}
+	if g.Prefix != "" {
+		opts = append(opts, SetPrefix(g.Prefix))
+	}
+	if g.NoMixedFreeString {
+		opts = append(opts, NoMixedFreeString)
+	}
+	return opts
+}
+
+func CleanupLLMResult(llmresult string, functionConfig FunctionsConfig) string {
 	log.Debug().Msgf("LLM result: %s", llmresult)
 
-	for _, item := range functionConfig.ReplaceResults {
+	for _, item := range functionConfig.ReplaceLLMResult {
 		k, v := item.Key, item.Value
 		log.Debug().Msgf("Replacing %s with %s", k, v)
 		re := regexp.MustCompile(k)
 		llmresult = re.ReplaceAllString(llmresult, v)
 	}
-
 	log.Debug().Msgf("LLM result(processed): %s", llmresult)
 
+	return llmresult
+}
+
+func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncCallResults {
+
+	log.Debug().Msgf("LLM result: %s", llmresult)
+
+	for _, item := range functionConfig.ReplaceFunctionResults {
+		k, v := item.Key, item.Value
+		log.Debug().Msgf("Replacing %s with %s", k, v)
+		re := regexp.MustCompile(k)
+		llmresult = re.ReplaceAllString(llmresult, v)
+	}
+	log.Debug().Msgf("LLM result(function cleanup): %s", llmresult)
+
 	functionNameKey := "function"
 	if functionConfig.FunctionName {
 		functionNameKey = "name"
@@ -127,7 +170,6 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 
 	// the response is a string that we have to parse
 	result := make(map[string]string)
-
 	if len(functionConfig.JSONRegexMatch) != 0 {
 		for _, r := range functionConfig.JSONRegexMatch {
 			// We use a regex to extract the JSON object from the response
diff --git a/pkg/functions/parse_test.go b/pkg/functions/parse_test.go
index 2485d70b318..5e266c50f27 100644
--- a/pkg/functions/parse_test.go
+++ b/pkg/functions/parse_test.go
@@ -120,7 +120,7 @@ Some text before the JSON
 Some text after the JSON
 `
 
-			functionConfig.ReplaceResults = []ReplaceResult{
+			functionConfig.ReplaceFunctionResults = []ReplaceResult{
 				{Key: `(?s)^[^{\[]*`, Value: ""},
 				{Key: `(?s)[^}\]]*$`, Value: ""},
 			}
@@ -137,7 +137,7 @@ Some text before the JSON
 [{"function": "add", "arguments": {"x": 5, "y": 3}}, {"function": "subtract", "arguments": {"x": 10, "y": 7}}]
 Some text after the JSON
 `
-			functionConfig.ReplaceResults = []ReplaceResult{
+			functionConfig.ReplaceFunctionResults = []ReplaceResult{
 				{Key: `(?s)^[^{\[]*`, Value: ""},
 				{Key: `(?s)[^}\]]*$`, Value: ""},
 			}
@@ -163,7 +163,7 @@ Some text after the JSON
 			// Regex to match non-JSON characters after the JSON structure
 			//reAfter := regexp.MustCompile(`(?s)(?<=\}|\]).*$`)
 
-			functionConfig.ReplaceResults = []ReplaceResult{
+			functionConfig.ReplaceFunctionResults = []ReplaceResult{
 				{Key: `(?s)^[^{\[]*`, Value: ""},
 				{Key: `(?s)[^}\]]*$`, Value: ""},
 				// Regex pattern to match single quotes around keys and values
@@ -196,7 +196,7 @@ Some text after the JSON
 			// Regex to match non-JSON characters after the JSON structure
 			//reAfter := regexp.MustCompile(`(?s)(?<=\}|\]).*$`)
 
-			functionConfig.ReplaceResults = []ReplaceResult{
+			functionConfig.ReplaceFunctionResults = []ReplaceResult{
 				{Key: `(?s)^[^{\[]*`, Value: ""},
 				{Key: `(?s)[^}\]]*$`, Value: ""},
 				// Regex pattern to match single quotes around keys and values