From 21863523b4ab9377d3c687bc3b451362afd6177d Mon Sep 17 00:00:00 2001 From: Andrew Hyatt Date: Sun, 28 Jul 2024 18:38:25 -0400 Subject: [PATCH] Add model specifity for embeddings capabilities for ollama --- NEWS.org | 1 + README.org | 2 +- llm-ollama.el | 11 ++++++++++- llm-test.el | 17 ++++++++++++++++- 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/NEWS.org b/NEWS.org index 90d8aa7..75a5153 100644 --- a/NEWS.org +++ b/NEWS.org @@ -2,6 +2,7 @@ - Support Ollama function calling, for models which support it. - Make sure every model, even unknown models, return some value for ~llm-chat-token-limit~. - Add token count for llama3.1 model. +- Make =llm-capabilities= work model-by-model for embeddings and functions * Version 0.17.0 - Introduced =llm-prompt= for prompt management and creation from generators. - Removed Gemini and Vertex token counting, because =llm-prompt= uses token diff --git a/README.org b/README.org index 623beec..82d14ae 100644 --- a/README.org +++ b/README.org @@ -66,7 +66,7 @@ In addition to the provider, which you may want multiple of (for example, to cha - ~:host~: The host that ollama is run on. This is optional and will default to localhost. - ~:port~: The port that ollama is run on. This is optional and will default to the default ollama port. - ~:chat-model~: The model name to use for chat. This is not optional for chat use, since there is no default. -- ~:embedding-model~: The model name to use for embeddings. This is not optional for embedding use, since there is no default. +- ~:embedding-model~: The model name to use for embeddings (only [some models](https://ollama.com/search?q=&c=embedding) can be used for embeddings. This is not optional for embedding use, since there is no default. ** GPT4All [[https://gpt4all.io/index.html][GPT4All]] is a way to run large language models locally. To use it with =llm= package, you must click "Enable API Server" in the settings. It does not offer embeddings or streaming functionality, though, so Ollama might be a better fit for users who are not already set up with local models. You can set it up with the following parameters: - ~:host~: The host that GPT4All is run on. This is optional and will default to localhost. diff --git a/llm-ollama.el b/llm-ollama.el index f89a888..9a00995 100644 --- a/llm-ollama.el +++ b/llm-ollama.el @@ -162,7 +162,16 @@ PROVIDER is the llm-ollama provider." (llm-provider-utils-model-token-limit (llm-ollama-chat-model provider))) (cl-defmethod llm-capabilities ((provider llm-ollama)) - (append (list 'streaming 'embeddings) + (append (list 'streaming) + ;; See https://ollama.com/search?q=&c=embedding + (when (and (llm-ollama-embedding-model provider) + (string-match + (rx (or "nomic-embed-text" + "mxbai-embed-large" + "all-minilm" + "snowflake-arctic-embed")) + (llm-ollama-embedding-model provider))) + (list 'embeddings)) ;; see https://ollama.com/search?c=tools (when (string-match (rx (or "llama3.1" "mistral-nemo" "mistral-large" diff --git a/llm-test.el b/llm-test.el index 39c2a59..1b1ece4 100644 --- a/llm-test.el +++ b/llm-test.el @@ -145,7 +145,22 @@ (should-not (has-fc "gemma")) (should-not (has-fc "gemma2")) (should-not (has-fc "llama2")) - (should-not (has-fc "llama")))) + (should-not (has-fc "llama")) + (should-not (has-fc "unknown")))) + +(ert-deftest llm-test-ollama-embedding-capabilities () + ;; tests subject to change as models may get function calling + (cl-flet ((has-emb (model) + (member 'embeddings + (llm-capabilities (make-llm-ollama :embedding-model model + :chat-model "mistral"))))) + (should-not (has-emb "llama3.1")) + (should-not (has-emb "mistral")) + (should (has-emb "nomic-embed-text")) + (should (has-emb "mxbai-embed-large")) + (should-not (has-emb "mxbai-embed-small")) + (should-not (has-emb "unknown")) + (should-not (has-emb nil)))) (provide 'llm-test) ;;; llm-test.el ends here