diff --git a/Dockerfile b/Dockerfile index 16bbe3e0477a..761fe0aa9394 100644 --- a/Dockerfile +++ b/Dockerfile @@ -127,7 +127,7 @@ ENV \ # Moonshot MOONSHOT_API_KEY="" MOONSHOT_PROXY_URL="" \ # Novita - NOVITA_API_KEY="" \ + NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \ # Ollama OLLAMA_MODEL_LIST="" OLLAMA_PROXY_URL="" \ # OpenAI diff --git a/Dockerfile.database b/Dockerfile.database index 82cc7238aad2..cdbe0cb1d553 100644 --- a/Dockerfile.database +++ b/Dockerfile.database @@ -168,7 +168,7 @@ ENV \ # Moonshot MOONSHOT_API_KEY="" MOONSHOT_PROXY_URL="" \ # Novita - NOVITA_API_KEY="" \ + NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \ # Ollama OLLAMA_MODEL_LIST="" OLLAMA_PROXY_URL="" \ # OpenAI diff --git a/src/config/llm.ts b/src/config/llm.ts index f3c72e728166..8f3777ff2204 100644 --- a/src/config/llm.ts +++ b/src/config/llm.ts @@ -81,6 +81,7 @@ export const getLLMConfig = () => { ENABLED_NOVITA: z.boolean(), NOVITA_API_KEY: z.string().optional(), + NOVITA_MODEL_LIST: z.string().optional(), ENABLED_BAICHUAN: z.boolean(), BAICHUAN_API_KEY: z.string().optional(), @@ -176,6 +177,7 @@ export const getLLMConfig = () => { ENABLED_NOVITA: !!process.env.NOVITA_API_KEY, NOVITA_API_KEY: process.env.NOVITA_API_KEY, + NOVITA_MODEL_LIST: process.env.NOVITA_MODEL_LIST, ENABLED_BAICHUAN: !!process.env.BAICHUAN_API_KEY, BAICHUAN_API_KEY: process.env.BAICHUAN_API_KEY, diff --git a/src/config/modelProviders/novita.ts b/src/config/modelProviders/novita.ts index 586a95cb4713..1e5041dc4b9c 100644 --- a/src/config/modelProviders/novita.ts +++ b/src/config/modelProviders/novita.ts @@ -1,30 +1,47 @@ import { ModelProviderCard } from '@/types/llm'; +// ref: https://novita.ai/model-api/product/llm-api const Novita: ModelProviderCard = { chatModels: [ { - displayName: 'Llama3 8B Instruct', + displayName: 'Llama3.1 8B Instruct', + enabled: true, + id: 'meta-llama/llama-3.1-8b-instruct', + tokens: 8192, + }, + { + displayName: 'Llama3.1 70B Instruct', enabled: true, + id: 'meta-llama/llama-3.1-70b-instruct', + tokens: 131_072, + }, + { + displayName: 'Llama3.1 405B Instruct', + enabled: true, + id: 'meta-llama/llama-3.1-405b-instruct', + tokens: 32_768, + }, + { + displayName: 'Llama3 8B Instruct', id: 'meta-llama/llama-3-8b-instruct', tokens: 8192, }, { displayName: 'Llama3 70B Instruct', - enabled: true, id: 'meta-llama/llama-3-70b-instruct', tokens: 8192, }, { - displayName: 'Nous Hermes 2 Pro - Llama3 8B', + displayName: 'Gemma 2 9B', enabled: true, - id: 'nousresearch/hermes-2-pro-llama-3-8b', + id: 'google/gemma-2-9b-it', tokens: 8192, }, { - displayName: 'Nous Hermes - Llama2 8B', + displayName: 'Mistral Nemo', enabled: true, - id: 'nousresearch/nous-hermes-llama2-13b', - tokens: 4096, + id: 'mistralai/mistral-nemo', + tokens: 32_768, }, { displayName: 'Mistral 7B Instruct', @@ -33,55 +50,44 @@ const Novita: ModelProviderCard = { tokens: 32_768, }, { - displayName: 'Dolphin Mixtral 8x22B', + displayName: 'WizardLM 2 7B', enabled: true, - id: 'cognitivecomputations/dolphin-mixtral-8x22b', - tokens: 16_000, + id: 'microsoft/wizardlm 2-7b', + tokens: 32_768, }, { - displayName: 'L3-70b-Euryale-v2.1', + displayName: 'WizardLM-2 8x22B', enabled: true, - id: 'sao10k/l3-70b-euryale-v2.1', - tokens: 16_000, + id: 'microsoft/wizardlm-2-8x22b', + tokens: 65_535, }, { - displayName: 'Midnight Rose 70B', - enabled: true, - id: 'sophosympatheia/midnight-rose-70b', - tokens: 4096, + displayName: 'Dolphin Mixtral 8x22B', + id: 'cognitivecomputations/dolphin-mixtral-8x22b', + tokens: 16_000, }, { - displayName: 'Mythomax L2 13b', - enabled: true, - id: 'gryphe/mythomax-l2-13b', - tokens: 4096, + displayName: 'Hermes 2 Pro Llama 3 8B', + id: 'nousresearch/hermes-2-pro-llama-3-8b', + tokens: 8192, }, { - displayName: 'Nous Hermes 2 - Mixtral 8x7B-DPO', - enabled: true, + displayName: 'Hermes 2 Mixtral 8x7B DPO', id: 'Nous-Hermes-2-Mixtral-8x7B-DPO', tokens: 32_768, }, { - displayName: 'Lzlv 70b', - enabled: true, - id: 'lzlv_70b', + displayName: 'MythoMax l2 13B', + id: 'gryphe/mythomax-l2-13b', tokens: 4096, }, { - displayName: 'Open Hermes 2.5 Mistral 7B', - enabled: true, - id: 'teknium/openhermes-2.5-mistral-7b', + displayName: 'OpenChat 7B', + id: 'openchat/openchat-7b', tokens: 4096, }, - { - displayName: 'Wizardlm2 8x22B', - enabled: true, - id: 'microsoft/wizardlm-2-8x22b', - tokens: 65_535, - }, ], - checkModel: 'meta-llama/llama-3-70b-instruct', + checkModel: 'meta-llama/llama-3.1-8b-instruct', disableBrowserRequest: true, id: 'novita', modelList: { showModelFetcher: true }, diff --git a/src/config/modelProviders/togetherai.ts b/src/config/modelProviders/togetherai.ts index c5ec6d47d728..ccb07b002062 100644 --- a/src/config/modelProviders/togetherai.ts +++ b/src/config/modelProviders/togetherai.ts @@ -1,76 +1,161 @@ import { ModelProviderCard } from '@/types/llm'; -// ref https://docs.together.ai/docs/inference-models +// ref https://docs.together.ai/docs/chat-models +// ref https://www.together.ai/pricing const TogetherAI: ModelProviderCard = { chatModels: [ { - displayName: 'Deepseek Coder Instruct (33B)', + displayName: 'Llama 3.1 8B Instruct Turbo', enabled: true, - id: 'deepseek-ai/deepseek-coder-33b-instruct', - tokens: 16_384, + id: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo', + tokens: 131_072, }, { - displayName: 'Phind Code LLaMA v2 (34B)', + displayName: 'Llama 3.1 70B Instruct Turbo', enabled: true, - id: 'Phind/Phind-CodeLlama-34B-v2', - tokens: 16_384, + id: 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo', + tokens: 131_072, }, { - displayName: 'Gemma Instruct (2B)', + displayName: 'Llama 3.1 405B Instruct Turbo', enabled: true, - id: 'google/gemma-2b-it', + id: 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo', + tokens: 8192, + }, + { + displayName: 'Llama 3 8B Instruct Turbo', + id: 'meta-llama/Meta-Llama-3-8B-Instruct-Turbo', + tokens: 8192, + }, + { + displayName: 'Llama 3 70B Instruct Turbo', + id: 'meta-llama/Meta-Llama-3-70B-Instruct-Turbo', + tokens: 8192, + }, + { + displayName: 'Llama 3 8B Instruct Lite', + id: 'meta-llama/Meta-Llama-3-8B-Instruct-Lite', + tokens: 8192, + }, + { + displayName: 'Llama 3 70B Instruct Lite', + id: 'meta-llama/Meta-Llama-3-70B-Instruct-Lite', + tokens: 8192, + }, + { + displayName: 'LLaMA-3 Chat (8B)', + id: 'meta-llama/Llama-3-8b-chat-hf', + tokens: 8192, + }, + { + displayName: 'LLaMA-3 Chat (70B)', + id: 'meta-llama/Llama-3-70b-chat-hf', tokens: 8192, }, { displayName: 'LLaMA-2 Chat (13B)', - enabled: true, id: 'meta-llama/Llama-2-13b-chat-hf', tokens: 4096, }, { - displayName: '01-ai Yi Chat (34B)', + displayName: 'Gemma 2 9B', enabled: true, - id: 'zero-one-ai/Yi-34B-Chat', - tokens: 4096, + id: 'google/gemma-2-9b-it', + tokens: 8192, }, { - displayName: 'Mixtral-8x7B Instruct (46.7B)', + displayName: 'Gemma 2 27B', enabled: true, - id: 'mistralai/Mixtral-8x7B-Instruct-v0.1', + id: 'google/gemma-2-27b-it', + tokens: 8192, + }, + { + displayName: 'Gemma Instruct (2B)', + id: 'google/gemma-2b-it', + tokens: 8192, + }, + { + displayName: 'Mistral (7B) Instruct v0.3', + enabled: true, + id: 'mistralai/Mistral-7B-Instruct-v0.3', tokens: 32_768, }, { - displayName: 'Nous Hermes 2 - Mixtral 8x7B-DPO (46.7B)', + displayName: 'Mistral (7B) Instruct v0.2', + id: 'mistralai/Mistral-7B-Instruct-v0.2', + tokens: 32_768, + }, + { + displayName: 'Mistral (7B) Instruct', + id: 'mistralai/Mistral-7B-Instruct-v0.1', + tokens: 8192, + }, + { + displayName: 'Mixtral-8x7B Instruct (46.7B)', enabled: true, - id: 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', + id: 'mistralai/Mixtral-8x7B-Instruct-v0.1', tokens: 32_768, }, { - displayName: 'Nous Hermes-2 Yi (34B)', + displayName: 'Mixtral-8x22B Instruct (141B)', enabled: true, - id: 'NousResearch/Nous-Hermes-2-Yi-34B', - tokens: 4096, + id: 'mistralai/Mixtral-8x22B-Instruct-v0.1', + tokens: 65_536, }, { - displayName: 'Qwen 1.5 Chat (7B)', + displayName: 'DeepSeek LLM Chat (67B)', enabled: true, - id: 'Qwen/Qwen1.5-7B-Chat', - tokens: 32_768, + id: 'deepseek-ai/deepseek-llm-67b-chat', + tokens: 4096, }, { - displayName: 'Qwen 1.5 Chat (32B)', + displayName: 'Qwen 2 Instruct (72B)', enabled: true, - id: 'Qwen/Qwen1.5-32B-Chat', + id: 'Qwen/Qwen2-72B-Instruct', tokens: 32_768, }, { displayName: 'Qwen 1.5 Chat (72B)', - enabled: true, id: 'Qwen/Qwen1.5-72B-Chat', tokens: 32_768, }, + { + displayName: 'Qwen 1.5 Chat (110B)', + id: 'Qwen/Qwen1.5-110B-Chat', + tokens: 32_768, + }, + { + displayName: 'DBRX Instruct', + id: 'databricks/dbrx-instruct', + tokens: 32_768, + }, + { + displayName: 'Upstage SOLAR Instruct v1 (11B)', + id: 'upstage/SOLAR-10.7B-Instruct-v1.0', + tokens: 4096, + }, + { + displayName: 'Nous Hermes 2 - Mixtral 8x7B-DPO (46.7B)', + id: 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', + tokens: 32_768, + }, + { + displayName: 'Nous Hermes-2 Yi (34B)', + id: 'NousResearch/Nous-Hermes-2-Yi-34B', + tokens: 4096, + }, + { + displayName: 'MythoMax-L2 (13B)', + id: 'Gryphe/MythoMax-L2-13b', + tokens: 4096, + }, + { + displayName: 'StripedHyena Nous (7B)', + id: 'togethercomputer/StripedHyena-Nous-7B', + tokens: 32_768, + }, ], - checkModel: 'togethercomputer/alpaca-7b', + checkModel: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo', id: 'togetherai', modelList: { showModelFetcher: true }, name: 'Together AI', diff --git a/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap b/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap index f6b09c6e1247..4a969451ce8d 100644 --- a/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap +++ b/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap @@ -45,7 +45,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Nous Hermes 2 Mixtral 7bx8 DPO is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.", "displayName": "Nous Hermes 2 - Mixtral 8x7B-DPO ", - "enabled": true, + "enabled": false, "functionCall": false, "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "maxOutput": 32768, @@ -65,7 +65,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Nous Hermes 2 - Yi-34B is a state of the art Yi Fine-tune", "displayName": "Nous Hermes-2 Yi (34B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "NousResearch/Nous-Hermes-2-Yi-34B", "maxOutput": 4096, @@ -145,7 +145,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", "displayName": "Qwen 1.5 Chat (32B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "Qwen/Qwen1.5-32B-Chat", "maxOutput": 32768, @@ -165,7 +165,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", "displayName": "Qwen 1.5 Chat (72B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "Qwen/Qwen1.5-72B-Chat", "maxOutput": 32768, @@ -175,7 +175,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", "displayName": "Qwen 1.5 Chat (7B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "Qwen/Qwen1.5-7B-Chat", "maxOutput": 32768, @@ -295,7 +295,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.", "displayName": "Deepseek Coder Instruct (33B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "deepseek-ai/deepseek-coder-33b-instruct", "maxOutput": 16384, @@ -305,7 +305,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese", "displayName": "DeepSeek LLM Chat (67B)", - "enabled": false, + "enabled": true, "functionCall": false, "id": "deepseek-ai/deepseek-llm-67b-chat", "maxOutput": 4096, @@ -325,7 +325,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", "displayName": "Gemma Instruct (2B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "google/gemma-2b-it", "maxOutput": 8192, @@ -365,7 +365,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", "displayName": "LLaMA-2 Chat (13B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "meta-llama/Llama-2-13b-chat-hf", "maxOutput": 4096, @@ -445,7 +445,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.", "displayName": "Mixtral-8x22B Instruct v0.1", - "enabled": false, + "enabled": true, "functionCall": false, "id": "mistralai/Mixtral-8x22B-Instruct-v0.1", "maxOutput": 65536, @@ -565,7 +565,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "The Yi series models are large language models trained from scratch by developers at 01.AI", "displayName": "01-ai Yi Chat (34B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "zero-one-ai/Yi-34B-Chat", "maxOutput": 4096, diff --git a/src/server/globalConfig/index.ts b/src/server/globalConfig/index.ts index dac2d804de48..5f31c584ee4c 100644 --- a/src/server/globalConfig/index.ts +++ b/src/server/globalConfig/index.ts @@ -4,6 +4,7 @@ import { fileEnv } from '@/config/file'; import { langfuseEnv } from '@/config/langfuse'; import { getLLMConfig } from '@/config/llm'; import { + NovitaProviderCard, OllamaProviderCard, OpenAIProviderCard, OpenRouterProviderCard, @@ -39,7 +40,9 @@ export const getServerGlobalConfig = () => { ENABLED_ANTHROPIC, ENABLED_MINIMAX, ENABLED_MISTRAL, + ENABLED_NOVITA, + NOVITA_MODEL_LIST, ENABLED_QWEN, QWEN_MODEL_LIST, @@ -100,7 +103,14 @@ export const getServerGlobalConfig = () => { minimax: { enabled: ENABLED_MINIMAX }, mistral: { enabled: ENABLED_MISTRAL }, moonshot: { enabled: ENABLED_MOONSHOT }, - novita: { enabled: ENABLED_NOVITA }, + novita: { + enabled: ENABLED_NOVITA, + enabledModels: extractEnabledModels(NOVITA_MODEL_LIST), + serverModelCards: transformToChatModelCards({ + defaultChatModels: NovitaProviderCard.chatModels, + modelString: NOVITA_MODEL_LIST, + }), + }, ollama: { enabled: ENABLED_OLLAMA, fetchOnClient: !OLLAMA_PROXY_URL,