Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make sure every model, even unknown ones, have some context size #54

Merged
merged 4 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
branches: [ "*" ]

jobs:
test:
Expand Down
2 changes: 2 additions & 0 deletions NEWS.org
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
* Verseion 0.17.1
- Make sure every model, even unknown models, return some value for ~llm-chat-token-limit~.
* Version 0.17.0
- Introduced =llm-prompt= for prompt management and creation from generators.
- Removed Gemini and Vertex token counting, because =llm-prompt= uses token
Expand Down
5 changes: 4 additions & 1 deletion llm-openai.el
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ RESPONSE can be nil if the response is complete."
;; models, but not for 32k models.
(+ (* n 1024) (if (= n 16) 1 0))))
((equal model "gpt-4") 8192)
((equal model "gpt-4o") 30000)
((string-match-p "gpt-4o" model) 30000)
((string-match-p (rx (seq "gpt-4-" (+ ascii) "-preview")) model)
128000)
((string-match-p (rx (seq "gpt-4-" (+ digit))) model)
Expand All @@ -270,6 +270,9 @@ RESPONSE can be nil if the response is complete."
4096)
(t 4096))))

(cl-defmethod llm-chat-token-limit ((_ llm-openai-compatible))
(llm-provider-utils-model-token-limit (llm-ollama-chat-model provider)))

(cl-defmethod llm-capabilities ((_ llm-openai))
(list 'streaming 'embeddings 'function-calls))

Expand Down
5 changes: 4 additions & 1 deletion llm-provider-utils.el
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,10 @@ conversation history will follow."
((string-match-p "orca" model) 2048)
((string-match-p "llama\s*2" model) 4096)
((string-match-p "llama" model) 2048)
((string-match-p "starcoder" model) 8192))))
((string-match-p "starcoder" model) 8192)
((string-match-p "gemma" model) 8192)
;; default to the smallest context window, 2048
(t 2048))))

(defun llm-provider-utils-openai-arguments (args)
"Convert ARGS to the Open AI function calling spec.
Expand Down
21 changes: 15 additions & 6 deletions llm-test.el
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@
(cl-flet* ((token-limit-for (model)
(llm-chat-token-limit (make-llm-openai :chat-model model)))
(should-have-token-limit (model limit)
(should (equal limit (token-limit-for model)))))
(ert-info ((format "Testing %s" model))
(should (equal limit (token-limit-for model))))))
;; From https://platform.openai.com/docs/models/gpt-3-5
(should-have-token-limit "gpt-3.5-turbo-1106" 16385)
(should-have-token-limit "gpt-3.5-turbo" 4096)
Expand All @@ -98,31 +99,39 @@
(should-have-token-limit "gpt-4-32k" 32768)
(should-have-token-limit "gpt-4-0613" 8192)
(should-have-token-limit "gpt-4-32k-0613" 32768)
(should-have-token-limit "gpt-4o" 30000)))
(should-have-token-limit "gpt-4o" 30000)
(should-have-token-limit "gpt-4o-mini" 30000)
(should-have-token-limit "unknown" 4096)))

(ert-deftest llm-test-chat-token-limit-gemini ()
(should (= 30720 (llm-chat-token-limit (make-llm-gemini))))
(should (= 12288 (llm-chat-token-limit
(make-llm-gemini :chat-model "gemini-pro-vision"))))
(should (= 1048576 (llm-chat-token-limit
(make-llm-gemini :chat-model "gemini-1.5-flash")))))
(make-llm-gemini :chat-model "gemini-1.5-flash"))))
(should (= 2048 (llm-chat-token-limit
(make-llm-vertex :chat-model "unknown")))))

(ert-deftest llm-test-chat-token-limit-vertex ()
(should (= 30720 (llm-chat-token-limit (make-llm-vertex))))
(should (= 12288 (llm-chat-token-limit
(make-llm-vertex :chat-model "gemini-pro-vision"))))
(should (= 1048576 (llm-chat-token-limit
(make-llm-gemini :chat-model "gemini-1.5-flash")))))
(make-llm-gemini :chat-model "gemini-1.5-flash"))))
(should (= 2048 (llm-chat-token-limit
(make-llm-vertex :chat-model "unknown")))))

(ert-deftest llm-test-chat-token-limit-ollama ()
;; The code is straightforward, so no need to test all the models.
(should (= 8192 (llm-chat-token-limit
(make-llm-ollama :chat-model "mistral:latest")))))
(make-llm-ollama :chat-model "mistral:latest"))))
(should (= 2048 (llm-chat-token-limit
(make-llm-ollama :chat-model "unknown")))))

(ert-deftest llm-test-chat-token-limit-gpt4all ()
;; The code is straightforward, so no need to test all the models.
(should (= 8192 (llm-chat-token-limit
(make-llm-ollama :chat-model "Mistral")))))
(make-llm-gpt4all :chat-model "Mistral")))))

(provide 'llm-test)
;;; llm-test.el ends here
6 changes: 2 additions & 4 deletions llm-vertex.el
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,8 @@ If STREAMING is non-nil, use the URL for the streaming API."
(cond ((equal "gemini-pro" model) 30720)
((equal "gemini-pro-vision" model) 12288)
((string-match-p (rx (seq "gemini-1.5")) model) 1048576)
;; This shouldn't happen unless there's a new model, which could be a
;; smaller or larger model. We'll play it safe and choose a reasonable
;; number.
(t 4096)))
;; Vertex can run different models, so check the standard model names.
(t (llm-provider-utils-model-token-limit model))))

(cl-defmethod llm-chat-token-limit ((provider llm-vertex))
(llm-vertex--chat-token-limit (llm-vertex-chat-model provider)))
Expand Down