From 8a91a6d652618deab95ffb8cb3813a4f99b0bf61 Mon Sep 17 00:00:00 2001 From: Andrew Hyatt Date: Thu, 22 Aug 2024 01:01:19 -0400 Subject: [PATCH] Fix function calling conversations for Claude (#68) This both fixes an issue with us sending Claude something not supported when having function-call conversations, but also gives the client a way to handle conversations with functions that respects differences in how the LLMs expect to be called. Also make the integration tests cleaner via macros Fixes https://github.com/ahyatt/llm/issues/62 --- NEWS.org | 3 +- README.org | 12 ++-- llm-claude.el | 46 ++++++++++----- llm-integration-test.el | 125 ++++++++++++++++++++-------------------- llm-provider-utils.el | 13 +++-- llm-tester.el | 3 - llm.el | 3 - 7 files changed, 112 insertions(+), 93 deletions(-) diff --git a/NEWS.org b/NEWS.org index 9771a81..274523e 100644 --- a/NEWS.org +++ b/NEWS.org @@ -1,9 +1,10 @@ * Version 0.17.2 - Fix compiled functions not being evaluated in =llm-prompt=. - Use Ollama's new =embed= API instead of the obsolete one. +- Fix Claude function calling conversations - Fix issue in Open AI streaming function calling. - Update Open AI and Claude default chat models to the later models. -* Version 0.17.1 + Version 0.17.1 - Support Ollama function calling, for models which support it. - Make sure every model, even unknown models, return some value for ~llm-chat-token-limit~. - Add token count for llama3.1 model. diff --git a/README.org b/README.org index 82d14ae..ee89558 100644 --- a/README.org +++ b/README.org @@ -160,10 +160,14 @@ with the arguments supplied by the LLM. Instead of returning (or passing to a callback) a string, instead an alist will be returned of function names and return values. -The client must then send this back to the LLM, to get a textual response from -the LLM based on the results of the function call. These have already been added -to the prompt, so the client only has to call the LLM again. Gemini and Vertex -require this extra call to the LLM, but Open AI does not. +After sending a function call, the client could use the result, but if you want +to proceed with the conversation, or get a textual response that accompany the +function you should just send the prompt back with no modifications. This is +because the LLM gives the function call to make as a response, and then expects +to get back the results of that function call. The results were already +executed at the end of the previous call, which also stores the result of that +execution in the prompt. This is why it should be sent back without further +modifications. Be aware that there is no gaurantee that the function will be called correctly. While the LLMs mostly get this right, they are trained on Javascript functions, diff --git a/llm-claude.el b/llm-claude.el index fc93455..d95d6c3 100644 --- a/llm-claude.el +++ b/llm-claude.el @@ -57,16 +57,19 @@ ("max_tokens" . ,(or (llm-chat-prompt-max-tokens prompt) 4096)) ("messages" . ,(mapcar (lambda (interaction) - (append - `(("role" . ,(pcase (llm-chat-prompt-interaction-role interaction) - ('function 'user) - ('assistant 'assistant) - ('user 'user))) - ("content" . ,(or (llm-chat-prompt-interaction-content interaction) - (llm-chat-prompt-function-call-result-result - (llm-chat-prompt-interaction-function-call-result interaction))))) - (when-let ((r (llm-chat-prompt-interaction-function-call-result interaction))) - `(("tool_use_id" . ,(llm-chat-prompt-function-call-result-call-id r)))))) + `(("role" . ,(pcase (llm-chat-prompt-interaction-role interaction) + ('function 'user) + ('assistant 'assistant) + ('user 'user))) + ("content" . + ,(if (llm-chat-prompt-interaction-function-call-result interaction) + `((("type" . "tool_result") + ("tool_use_id" . + ,(llm-chat-prompt-function-call-result-call-id + (llm-chat-prompt-interaction-function-call-result interaction))) + ("content" . + ,(llm-chat-prompt-interaction-content interaction)))) + (llm-chat-prompt-interaction-content interaction))))) (llm-chat-prompt-interactions prompt))))) (system (llm-provider-utils-get-system-prompt prompt))) (when (llm-chat-prompt-functions prompt) @@ -87,10 +90,15 @@ :name (assoc-default 'name item) :args (assoc-default 'input item))))) -(cl-defmethod llm-provider-populate-function-calls ((_ llm-claude) _ _) - ;; Claude does not need to be sent back the function calls it sent in the - ;; first place. - nil) +(cl-defmethod llm-provider-populate-function-calls ((_ llm-claude) prompt calls) + (llm-provider-utils-append-to-prompt + prompt + (mapcar (lambda (call) + `((type . "tool_use") + (id . ,(llm-provider-utils-function-call-id call)) + (name . ,(llm-provider-utils-function-call-name call)) + (input . ,(llm-provider-utils-function-call-args call)))) + calls))) (cl-defmethod llm-provider-chat-extract-result ((_ llm-claude) response) (let ((content (aref (assoc-default 'content response) 0))) @@ -147,6 +155,16 @@ (cl-defmethod llm-capabilities ((_ llm-claude)) (list 'streaming 'function-calls)) +(cl-defmethod llm-provider-append-to-prompt ((_ llm-claude) prompt result + &optional func-results) + ;; Claude doesn't have a 'function role, so we just always use assistant here. + ;; But if it's a function result, it considers that a 'user response, which + ;; needs to be sent back. + (llm-provider-utils-append-to-prompt prompt result func-results (if func-results + 'user + 'assistant))) + + (provide 'llm-claude) ;;; llm-claude.el ends here diff --git a/llm-integration-test.el b/llm-integration-test.el index 087f61d..6a02ccc 100644 --- a/llm-integration-test.el +++ b/llm-integration-test.el @@ -89,68 +89,67 @@ (dolist (model (split-string (getenv "OLLAMA_CHAT_MODELS") ", ")) (push (make-llm-ollama :chat-model model) providers))))) -(ert-deftest llm-chat () - (dolist (provider (llm-integration-test-providers)) - (let ((llm-warn-on-nonfree nil)) - (ert-info ((format "Using provider %s" (llm-name provider))) - (should (equal - (llm-chat - provider - (llm-make-chat-prompt llm-integration-test-chat-prompt)) - llm-integration-test-chat-answer)))))) - -(ert-deftest llm-chat-async () - (dolist (provider (llm-integration-test-providers)) - (ert-info ((format "Using provider %s" (llm-name provider))) - (let ((result nil) - (buf (current-buffer)) - (llm-warn-on-nonfree nil)) - (llm-chat-async - provider - (llm-make-chat-prompt llm-integration-test-chat-prompt) - (lambda (response) - (should (eq (current-buffer) buf)) - (setq result response)) - (lambda (error) - (error "Error: %s" error))) - (while (null result) - (sleep-for 0.1)) - (should (equal result llm-integration-test-chat-answer)))))) - -(ert-deftest llm-chat-streaming () - (dolist (provider (seq-filter - (lambda (provider) - (member 'streaming (llm-capabilities provider))) - (llm-integration-test-providers))) - (ert-info ((format "Using provider %s" (llm-name provider))) - (let ((streamed-result "") - (returned-result nil) - (llm-warn-on-nonfree nil) - (buf (current-buffer)) - (start-time (current-time))) - (llm-chat-streaming - provider - (llm-make-chat-prompt llm-integration-test-chat-prompt) - (lambda (partial-response) - (should (eq (current-buffer) buf)) - (setq streamed-result (concat streamed-result partial-response))) - (lambda (response) - (should (eq (current-buffer) buf)) - (setq returned-result response)) - (lambda (error) - (error "Error: %s" error))) - (while (and (null returned-result) - (time-less-p (time-subtract (current-time) start-time) 10)) - (sleep-for 0.1)) - (should (equal returned-result llm-integration-test-chat-answer)) - (should (equal streamed-result llm-integration-test-chat-answer)))))) - -(ert-deftest llm-function-call () - (dolist (provider (llm-integration-test-providers)) - (let ((llm-warn-on-nonfree nil)) - (ert-info ((format "Using provider %s" (llm-name provider))) - (should (equal - (llm-chat provider (llm-integration-test-fc-prompt)) - llm-integration-test-fc-answer)))))) +(defmacro llm-def-integration-test (name arglist &rest body) + "Define an integration test." + (declare (indent defun)) + `(ert-deftest ,name () + (dolist (,(car arglist) (llm-integration-test-providers)) + (ert-info ((format "Using provider %s" (llm-name provider))) + ,@body)))) + +(llm-def-integration-test llm-chat (provider) + (should (equal + (llm-chat + provider + (llm-make-chat-prompt llm-integration-test-chat-prompt)) + llm-integration-test-chat-answer))) + +(llm-def-integration-test llm-chat-async (provider) + (let ((result nil) + (buf (current-buffer)) + (llm-warn-on-nonfree nil)) + (llm-chat-async + provider + (llm-make-chat-prompt llm-integration-test-chat-prompt) + (lambda (response) + (should (eq (current-buffer) buf)) + (setq result response)) + (lambda (error) + (error "Error: %s" error))) + (while (null result) + (sleep-for 0.1)) + (should (equal result llm-integration-test-chat-answer)))) + +(llm-def-integration-test llm-chat-streaming (provider) + (when (member 'streaming (llm-capabilities provider)) + (let ((streamed-result "") + (returned-result nil) + (llm-warn-on-nonfree nil) + (buf (current-buffer)) + (start-time (current-time))) + (llm-chat-streaming + provider + (llm-make-chat-prompt llm-integration-test-chat-prompt) + (lambda (partial-response) + (should (eq (current-buffer) buf)) + (setq streamed-result (concat streamed-result partial-response))) + (lambda (response) + (should (eq (current-buffer) buf)) + (setq returned-result response)) + (lambda (error) + (error "Error: %s" error))) + (while (and (null returned-result) + (time-less-p (time-subtract (current-time) start-time) 10)) + (sleep-for 0.1)) + (should (equal returned-result llm-integration-test-chat-answer)) + (should (equal streamed-result llm-integration-test-chat-answer))))) + +(llm-def-integration-test llm-function-call (provider) + (should (equal + (llm-chat provider (llm-integration-test-fc-prompt)) + llm-integration-test-fc-answer)) + ;; Test that we can send the function back to the provider without error. + (llm-chat provider (llm-integration-test-fc-prompt))) + (provide 'llm-integration-test) diff --git a/llm-provider-utils.el b/llm-provider-utils.el index eadd596..91c8535 100644 --- a/llm-provider-utils.el +++ b/llm-provider-utils.el @@ -197,7 +197,7 @@ function call results, return a list of (cl-defgeneric llm-provider-populate-function-calls (provider prompt calls) "For PROVIDER, in PROMPT, record function call execution. -This is the recording before the calls were executed. +This is the recording before the calls were executed, in the prompt. CALLS are a list of `llm-provider-utils-function-call'.") (cl-defgeneric llm-provider-collect-streaming-function-data (provider data) @@ -516,10 +516,13 @@ ROLE will be `assistant' by default, but can be passed in for other roles." (setf (llm-chat-prompt-interactions prompt) (append (llm-chat-prompt-interactions prompt) (list (make-llm-chat-prompt-interaction - :role (if func-results - 'function - (or role 'assistant)) - :content output + :role (or role + (if func-results 'function 'assistant)) + ;; If it is a structure, it will get converted to JSON, + ;; otherwise make sure it is a string. + :content (if (listp output) + output + (format "%s" output)) :function-call-result func-results))))) (cl-defstruct llm-provider-utils-function-call diff --git a/llm-tester.el b/llm-tester.el index 5dcfd0d..e524cb3 100644 --- a/llm-tester.el +++ b/llm-tester.el @@ -280,9 +280,6 @@ of by calling the `describe_function' function." (let ((prompt (llm-tester-create-test-function-prompt)) (responses nil)) (push (llm-chat provider prompt) responses) - ;; The expectation (a requirement for Gemini) is we call back into the LLM - ;; with the results of the previous call to get a text response based on the - ;; function call results. (push (llm-chat provider prompt) responses) (llm-chat-prompt-append-response prompt "I'm now looking for a function that will return the directory of a filename") (push (llm-chat provider prompt) responses) diff --git a/llm.el b/llm.el index 292a718..86a5e0d 100644 --- a/llm.el +++ b/llm.el @@ -343,9 +343,6 @@ be passed to `llm-cancel-request'." new-error-callback))) result)) -(cl-defmethod llm-chat-function-call ((_ (eql nil)) _ _ _) - (error "LLM provider was nil. Please set the provider in the application you are using")) - (cl-defgeneric llm-chat-streaming (provider prompt partial-callback response-callback error-callback) "Stream a response to PROMPT from PROVIDER. PROMPT is a `llm-chat-prompt'.