Fix function calling conversations for Claude (#68)

This both fixes an issue with us sending Claude something not supported when having function-call conversations, but also gives the client a way to handle conversations with functions that respects differences in how the LLMs expect to be called. Also make the integration tests cleaner via macros Fixes #62
ahyatt · Aug 22, 2024 · 8a91a6d · 8a91a6d
1 parent 291c56d
commit 8a91a6d
Show file tree

Hide file tree

Showing 7 changed files with 112 additions and 93 deletions.
diff --git a/NEWS.org b/NEWS.org
@@ -1,9 +1,10 @@
 * Version 0.17.2
 - Fix compiled functions not being evaluated in =llm-prompt=.
 - Use Ollama's new =embed= API instead of the obsolete one.
+- Fix Claude function calling conversations
 - Fix issue in Open AI streaming function calling.
 - Update Open AI and Claude default chat models to the later models.
-* Version 0.17.1
+ Version 0.17.1
 - Support Ollama function calling, for models which support it.
 - Make sure every model, even unknown models, return some value for ~llm-chat-token-limit~.
 - Add token count for llama3.1 model.

diff --git a/README.org b/README.org
@@ -160,10 +160,14 @@ with the arguments supplied by the LLM. Instead of returning (or passing to a
 callback) a string, instead an alist will be returned of function names and
 return values.
 
-The client must then send this back to the LLM, to get a textual response from
-the LLM based on the results of the function call. These have already been added
-to the prompt, so the client only has to call the LLM again. Gemini and Vertex
-require this extra call to the LLM, but Open AI does not.
+After sending a function call, the client could use the result, but if you want
+to proceed with the conversation, or get a textual response that accompany the
+function you should just send the prompt back with no modifications.  This is
+because the LLM gives the function call to make as a response, and then expects
+to get back the results of that function call.  The results were already
+executed at the end of the previous call, which also stores the result of that
+execution in the prompt.  This is why it should be sent back without further
+modifications.
 
 Be aware that there is no gaurantee that the function will be called correctly.
 While the LLMs mostly get this right, they are trained on Javascript functions,

diff --git a/llm-claude.el b/llm-claude.el
@@ -57,16 +57,19 @@
                    ("max_tokens" . ,(or (llm-chat-prompt-max-tokens prompt) 4096))
                    ("messages" .
                     ,(mapcar (lambda (interaction)
-                               (append
-                                `(("role" . ,(pcase (llm-chat-prompt-interaction-role interaction)
-                                               ('function 'user)
-                                               ('assistant 'assistant)
-                                               ('user 'user)))
-                                  ("content" . ,(or (llm-chat-prompt-interaction-content interaction)
-                                                    (llm-chat-prompt-function-call-result-result
-                                                     (llm-chat-prompt-interaction-function-call-result interaction)))))
-                                (when-let ((r (llm-chat-prompt-interaction-function-call-result interaction)))
-                                  `(("tool_use_id" . ,(llm-chat-prompt-function-call-result-call-id r))))))
+                               `(("role" . ,(pcase (llm-chat-prompt-interaction-role interaction)
+                                              ('function 'user)
+                                              ('assistant 'assistant)
+                                              ('user 'user)))
+                                 ("content" .
+                                  ,(if (llm-chat-prompt-interaction-function-call-result interaction)
+                                       `((("type" . "tool_result")
+                                          ("tool_use_id" .
+                                           ,(llm-chat-prompt-function-call-result-call-id
+                                             (llm-chat-prompt-interaction-function-call-result interaction)))
+                                          ("content" .
+                                           ,(llm-chat-prompt-interaction-content interaction))))
+                                     (llm-chat-prompt-interaction-content interaction)))))
                              (llm-chat-prompt-interactions prompt)))))
         (system (llm-provider-utils-get-system-prompt prompt)))
     (when (llm-chat-prompt-functions prompt)
@@ -87,10 +90,15 @@
                       :name (assoc-default 'name item)
                       :args (assoc-default 'input item)))))
 
-(cl-defmethod llm-provider-populate-function-calls ((_ llm-claude) _ _)
-  ;; Claude does not need to be sent back the function calls it sent in the
-  ;; first place.
-  nil)
+(cl-defmethod llm-provider-populate-function-calls ((_ llm-claude) prompt calls)
+  (llm-provider-utils-append-to-prompt
+   prompt
+   (mapcar (lambda (call)
+             `((type . "tool_use")
+               (id . ,(llm-provider-utils-function-call-id call))
+               (name . ,(llm-provider-utils-function-call-name call))
+               (input . ,(llm-provider-utils-function-call-args call))))
+           calls)))
 
 (cl-defmethod llm-provider-chat-extract-result ((_ llm-claude) response)
   (let ((content (aref (assoc-default 'content response) 0)))
@@ -147,6 +155,16 @@
 (cl-defmethod llm-capabilities ((_ llm-claude))
   (list 'streaming 'function-calls))
 
+(cl-defmethod llm-provider-append-to-prompt ((_ llm-claude) prompt result
+                                             &optional func-results)
+  ;; Claude doesn't have a 'function role, so we just always use assistant here.
+  ;; But if it's a function result, it considers that a 'user response, which
+  ;; needs to be sent back.
+  (llm-provider-utils-append-to-prompt prompt result func-results (if func-results
+                                                                      'user
+                                                                    'assistant)))
+
+
 (provide 'llm-claude)
 
 ;;; llm-claude.el ends here
diff --git a/llm-integration-test.el b/llm-integration-test.el
@@ -89,68 +89,67 @@
       (dolist (model (split-string (getenv "OLLAMA_CHAT_MODELS") ", "))
         (push (make-llm-ollama :chat-model model) providers)))))
 
-(ert-deftest llm-chat ()
-  (dolist (provider (llm-integration-test-providers))
-    (let ((llm-warn-on-nonfree nil))
-      (ert-info ((format "Using provider %s" (llm-name provider)))
-        (should (equal
-                 (llm-chat
-                  provider
-                  (llm-make-chat-prompt llm-integration-test-chat-prompt))
-                 llm-integration-test-chat-answer))))))
-
-(ert-deftest llm-chat-async ()
-  (dolist (provider (llm-integration-test-providers))
-    (ert-info ((format "Using provider %s" (llm-name provider)))
-      (let ((result nil)
-            (buf (current-buffer))
-            (llm-warn-on-nonfree nil))
-        (llm-chat-async
-         provider
-         (llm-make-chat-prompt llm-integration-test-chat-prompt)
-         (lambda (response)
-           (should (eq (current-buffer) buf))
-           (setq result response))
-         (lambda (error)
-           (error "Error: %s" error)))
-        (while (null result)
-          (sleep-for 0.1))
-        (should (equal result llm-integration-test-chat-answer))))))
-
-(ert-deftest llm-chat-streaming ()
-  (dolist (provider (seq-filter
-                     (lambda (provider)
-                       (member 'streaming (llm-capabilities provider)))
-                     (llm-integration-test-providers)))
-    (ert-info ((format "Using provider %s" (llm-name provider)))
-      (let ((streamed-result "")
-            (returned-result nil)
-            (llm-warn-on-nonfree nil)
-            (buf (current-buffer))
-            (start-time (current-time)))
-        (llm-chat-streaming
-         provider
-         (llm-make-chat-prompt llm-integration-test-chat-prompt)
-         (lambda (partial-response)
-           (should (eq (current-buffer) buf))
-           (setq streamed-result (concat streamed-result partial-response)))
-         (lambda (response)
-           (should (eq (current-buffer) buf))
-           (setq returned-result response))
-         (lambda (error)
-           (error "Error: %s" error)))
-        (while (and (null returned-result)
-                    (time-less-p (time-subtract (current-time) start-time) 10))
-          (sleep-for 0.1))
-        (should (equal returned-result llm-integration-test-chat-answer))
-        (should (equal streamed-result llm-integration-test-chat-answer))))))
-
-(ert-deftest llm-function-call ()
-  (dolist (provider (llm-integration-test-providers))
-    (let ((llm-warn-on-nonfree nil))
-      (ert-info ((format "Using provider %s" (llm-name provider)))
-        (should (equal
-                 (llm-chat provider (llm-integration-test-fc-prompt))
-                 llm-integration-test-fc-answer))))))
+(defmacro llm-def-integration-test (name arglist &rest body)
+  "Define an integration test."
+  (declare (indent defun))
+  `(ert-deftest ,name ()
+     (dolist (,(car arglist) (llm-integration-test-providers))
+       (ert-info ((format "Using provider %s" (llm-name provider)))
+         ,@body))))
+
+(llm-def-integration-test llm-chat (provider)
+  (should (equal
+           (llm-chat
+            provider
+            (llm-make-chat-prompt llm-integration-test-chat-prompt))
+           llm-integration-test-chat-answer)))
+
+(llm-def-integration-test llm-chat-async (provider)
+  (let ((result nil)
+        (buf (current-buffer))
+        (llm-warn-on-nonfree nil))
+    (llm-chat-async
+     provider
+     (llm-make-chat-prompt llm-integration-test-chat-prompt)
+     (lambda (response)
+       (should (eq (current-buffer) buf))
+       (setq result response))
+     (lambda (error)
+       (error "Error: %s" error)))
+    (while (null result)
+      (sleep-for 0.1))
+    (should (equal result llm-integration-test-chat-answer))))
+
+(llm-def-integration-test llm-chat-streaming (provider)
+  (when (member 'streaming (llm-capabilities provider))
+    (let ((streamed-result "")
+          (returned-result nil)
+          (llm-warn-on-nonfree nil)
+          (buf (current-buffer))
+          (start-time (current-time)))
+      (llm-chat-streaming
+       provider
+       (llm-make-chat-prompt llm-integration-test-chat-prompt)
+       (lambda (partial-response)
+         (should (eq (current-buffer) buf))
+         (setq streamed-result (concat streamed-result partial-response)))
+       (lambda (response)
+         (should (eq (current-buffer) buf))
+         (setq returned-result response))
+       (lambda (error)
+         (error "Error: %s" error)))
+      (while (and (null returned-result)
+                  (time-less-p (time-subtract (current-time) start-time) 10))
+        (sleep-for 0.1))
+      (should (equal returned-result llm-integration-test-chat-answer))
+      (should (equal streamed-result llm-integration-test-chat-answer)))))
+
+(llm-def-integration-test llm-function-call (provider)
+  (should (equal
+           (llm-chat provider (llm-integration-test-fc-prompt))
+           llm-integration-test-fc-answer))
+  ;; Test that we can send the function back to the provider without error.
+  (llm-chat provider (llm-integration-test-fc-prompt)))
+
 
 (provide 'llm-integration-test)
diff --git a/llm-provider-utils.el b/llm-provider-utils.el
@@ -197,7 +197,7 @@ function call results, return a list of
 
 (cl-defgeneric llm-provider-populate-function-calls (provider prompt calls)
   "For PROVIDER, in PROMPT, record function call execution.
-This is the recording before the calls were executed.
+This is the recording before the calls were executed, in the prompt.
 CALLS are a list of `llm-provider-utils-function-call'.")
 
 (cl-defgeneric llm-provider-collect-streaming-function-data (provider data)
@@ -516,10 +516,13 @@ ROLE will be `assistant' by default, but can be passed in for other roles."
   (setf (llm-chat-prompt-interactions prompt)
         (append (llm-chat-prompt-interactions prompt)
                 (list (make-llm-chat-prompt-interaction
-                       :role (if func-results
-                                 'function
-                               (or role 'assistant))
-                       :content output
+                       :role (or role
+                                 (if func-results 'function 'assistant))
+                       ;; If it is a structure, it will get converted to JSON,
+                       ;; otherwise make sure it is a string.
+                       :content (if (listp output)
+                                    output
+                                  (format "%s" output))
                        :function-call-result func-results)))))
 
 (cl-defstruct llm-provider-utils-function-call

diff --git a/llm-tester.el b/llm-tester.el
@@ -280,9 +280,6 @@ of by calling the `describe_function' function."
   (let ((prompt (llm-tester-create-test-function-prompt))
         (responses nil))
     (push (llm-chat provider prompt) responses)
-    ;; The expectation (a requirement for Gemini) is we call back into the LLM
-    ;; with the results of the previous call to get a text response based on the
-    ;; function call results.
     (push (llm-chat provider prompt) responses)
     (llm-chat-prompt-append-response prompt "I'm now looking for a function that will return the directory of a filename")
     (push (llm-chat provider prompt) responses)

diff --git a/llm.el b/llm.el
@@ -343,9 +343,6 @@ be passed to `llm-cancel-request'."
                                       new-error-callback)))
     result))
 
-(cl-defmethod llm-chat-function-call ((_ (eql nil)) _ _ _)
-  (error "LLM provider was nil.  Please set the provider in the application you are using"))
-
 (cl-defgeneric llm-chat-streaming (provider prompt partial-callback response-callback error-callback)
   "Stream a response to PROMPT from PROVIDER.
 PROMPT is a `llm-chat-prompt'.