ahyatt · ahyatt · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -5,7 +5,7 @@ on:
   push:
     branches: [ "main" ]
   pull_request:
-    branches: [ "main" ]
+    branches: [ "*" ]
 
 jobs:
   test:

diff --git a/NEWS.org b/NEWS.org
@@ -1,3 +1,5 @@
+* Verseion 0.17.1
+- Make sure every model, even unknown models, return some value for ~llm-chat-token-limit~.
 * Version 0.17.0
 - Introduced =llm-prompt= for prompt management and creation from generators.
 - Removed Gemini and Vertex token counting, because =llm-prompt= uses token

diff --git a/llm-openai.el b/llm-openai.el
@@ -259,7 +259,7 @@ RESPONSE can be nil if the response is complete."
         ;; models, but not for 32k models.
         (+ (* n 1024) (if (= n 16) 1 0))))
      ((equal model "gpt-4") 8192)
-     ((equal model "gpt-4o") 30000)
+     ((string-match-p "gpt-4o" model) 30000)
      ((string-match-p (rx (seq "gpt-4-" (+ ascii) "-preview")) model)
       128000)
      ((string-match-p (rx (seq "gpt-4-" (+ digit))) model)
@@ -270,6 +270,9 @@ RESPONSE can be nil if the response is complete."
       4096)
      (t 4096))))
 
+(cl-defmethod llm-chat-token-limit ((_ llm-openai-compatible))
+  (llm-provider-utils-model-token-limit (llm-ollama-chat-model provider)))
+
 (cl-defmethod llm-capabilities ((_ llm-openai))
   (list 'streaming 'embeddings 'function-calls))
 

diff --git a/llm-provider-utils.el b/llm-provider-utils.el
@@ -432,7 +432,10 @@ conversation history will follow."
      ((string-match-p "orca" model) 2048)
      ((string-match-p "llama\s*2" model) 4096)
      ((string-match-p "llama" model) 2048)
-     ((string-match-p "starcoder" model) 8192))))
+     ((string-match-p "starcoder" model) 8192)
+     ((string-match-p "gemma" model) 8192)
+     ;; default to the smallest context window, 2048
+     (t 2048))))
 
 (defun llm-provider-utils-openai-arguments (args)
   "Convert ARGS to the Open AI function calling spec.

diff --git a/llm-test.el b/llm-test.el
@@ -81,7 +81,8 @@
   (cl-flet* ((token-limit-for (model)
                (llm-chat-token-limit (make-llm-openai :chat-model model)))
              (should-have-token-limit (model limit)
-               (should (equal limit (token-limit-for model)))))
+               (ert-info ((format "Testing %s" model))
+                 (should (equal limit (token-limit-for model))))))
     ;; From https://platform.openai.com/docs/models/gpt-3-5
     (should-have-token-limit "gpt-3.5-turbo-1106" 16385)
     (should-have-token-limit "gpt-3.5-turbo" 4096)
@@ -98,31 +99,39 @@
     (should-have-token-limit "gpt-4-32k" 32768)
     (should-have-token-limit "gpt-4-0613" 8192)
     (should-have-token-limit "gpt-4-32k-0613" 32768)
-    (should-have-token-limit "gpt-4o" 30000)))
+    (should-have-token-limit "gpt-4o" 30000)
+    (should-have-token-limit "gpt-4o-mini" 30000)
+    (should-have-token-limit "unknown" 4096)))
 
 (ert-deftest llm-test-chat-token-limit-gemini ()
   (should (= 30720 (llm-chat-token-limit (make-llm-gemini))))
   (should (= 12288 (llm-chat-token-limit
                     (make-llm-gemini :chat-model "gemini-pro-vision"))))
   (should (= 1048576 (llm-chat-token-limit
-                      (make-llm-gemini :chat-model "gemini-1.5-flash")))))
+                      (make-llm-gemini :chat-model "gemini-1.5-flash"))))
+  (should (= 2048 (llm-chat-token-limit
+                   (make-llm-vertex :chat-model "unknown")))))
 
 (ert-deftest llm-test-chat-token-limit-vertex ()
   (should (= 30720 (llm-chat-token-limit (make-llm-vertex))))
   (should (= 12288 (llm-chat-token-limit
                     (make-llm-vertex :chat-model "gemini-pro-vision"))))
   (should (= 1048576 (llm-chat-token-limit
-                      (make-llm-gemini :chat-model "gemini-1.5-flash")))))
+                      (make-llm-gemini :chat-model "gemini-1.5-flash"))))
+  (should (= 2048 (llm-chat-token-limit
+                   (make-llm-vertex :chat-model "unknown")))))
 
 (ert-deftest llm-test-chat-token-limit-ollama ()
   ;; The code is straightforward, so no need to test all the models.
   (should (= 8192 (llm-chat-token-limit
-                   (make-llm-ollama :chat-model "mistral:latest")))))
+                   (make-llm-ollama :chat-model "mistral:latest"))))
+  (should (= 2048 (llm-chat-token-limit
+                   (make-llm-ollama :chat-model "unknown")))))
 
 (ert-deftest llm-test-chat-token-limit-gpt4all ()
   ;; The code is straightforward, so no need to test all the models.
   (should (= 8192 (llm-chat-token-limit
-                   (make-llm-ollama :chat-model "Mistral")))))
+                   (make-llm-gpt4all :chat-model "Mistral")))))
 
 (provide 'llm-test)
 ;;; llm-test.el ends here
diff --git a/llm-vertex.el b/llm-vertex.el
@@ -283,10 +283,8 @@ If STREAMING is non-nil, use the URL for the streaming API."
   (cond ((equal "gemini-pro" model) 30720)
         ((equal "gemini-pro-vision" model) 12288)
         ((string-match-p (rx (seq "gemini-1.5")) model) 1048576)
-        ;; This shouldn't happen unless there's a new model, which could be a
-        ;; smaller or larger model.  We'll play it safe and choose a reasonable
-        ;; number.
-        (t 4096)))
+        ;; Vertex can run different models, so check the standard model names.
+        (t (llm-provider-utils-model-token-limit model))))
 
 (cl-defmethod llm-chat-token-limit ((provider llm-vertex))
   (llm-vertex--chat-token-limit (llm-vertex-chat-model provider)))