From 0dde3b14e279acef7549471771d56ea26c643da5 Mon Sep 17 00:00:00 2001 From: Zhijie He Date: Sun, 1 Sep 2024 18:45:57 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=84=20style:=20update=20Together=20AI?= =?UTF-8?q?=20model=20list=20(#3713)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 💄 style: update Together AI model list * 💄 style: update model info * 💄 style: update Llama3.1 405B tokens num * 🔨 chore: fix CI --- src/config/modelProviders/togetherai.ts | 139 ++++++++++++++---- .../__snapshots__/index.test.ts.snap | 22 +-- 2 files changed, 123 insertions(+), 38 deletions(-) diff --git a/src/config/modelProviders/togetherai.ts b/src/config/modelProviders/togetherai.ts index c5ec6d47d728..ccb07b002062 100644 --- a/src/config/modelProviders/togetherai.ts +++ b/src/config/modelProviders/togetherai.ts @@ -1,76 +1,161 @@ import { ModelProviderCard } from '@/types/llm'; -// ref https://docs.together.ai/docs/inference-models +// ref https://docs.together.ai/docs/chat-models +// ref https://www.together.ai/pricing const TogetherAI: ModelProviderCard = { chatModels: [ { - displayName: 'Deepseek Coder Instruct (33B)', + displayName: 'Llama 3.1 8B Instruct Turbo', enabled: true, - id: 'deepseek-ai/deepseek-coder-33b-instruct', - tokens: 16_384, + id: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo', + tokens: 131_072, }, { - displayName: 'Phind Code LLaMA v2 (34B)', + displayName: 'Llama 3.1 70B Instruct Turbo', enabled: true, - id: 'Phind/Phind-CodeLlama-34B-v2', - tokens: 16_384, + id: 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo', + tokens: 131_072, }, { - displayName: 'Gemma Instruct (2B)', + displayName: 'Llama 3.1 405B Instruct Turbo', enabled: true, - id: 'google/gemma-2b-it', + id: 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo', + tokens: 8192, + }, + { + displayName: 'Llama 3 8B Instruct Turbo', + id: 'meta-llama/Meta-Llama-3-8B-Instruct-Turbo', + tokens: 8192, + }, + { + displayName: 'Llama 3 70B Instruct Turbo', + id: 'meta-llama/Meta-Llama-3-70B-Instruct-Turbo', + tokens: 8192, + }, + { + displayName: 'Llama 3 8B Instruct Lite', + id: 'meta-llama/Meta-Llama-3-8B-Instruct-Lite', + tokens: 8192, + }, + { + displayName: 'Llama 3 70B Instruct Lite', + id: 'meta-llama/Meta-Llama-3-70B-Instruct-Lite', + tokens: 8192, + }, + { + displayName: 'LLaMA-3 Chat (8B)', + id: 'meta-llama/Llama-3-8b-chat-hf', + tokens: 8192, + }, + { + displayName: 'LLaMA-3 Chat (70B)', + id: 'meta-llama/Llama-3-70b-chat-hf', tokens: 8192, }, { displayName: 'LLaMA-2 Chat (13B)', - enabled: true, id: 'meta-llama/Llama-2-13b-chat-hf', tokens: 4096, }, { - displayName: '01-ai Yi Chat (34B)', + displayName: 'Gemma 2 9B', enabled: true, - id: 'zero-one-ai/Yi-34B-Chat', - tokens: 4096, + id: 'google/gemma-2-9b-it', + tokens: 8192, }, { - displayName: 'Mixtral-8x7B Instruct (46.7B)', + displayName: 'Gemma 2 27B', enabled: true, - id: 'mistralai/Mixtral-8x7B-Instruct-v0.1', + id: 'google/gemma-2-27b-it', + tokens: 8192, + }, + { + displayName: 'Gemma Instruct (2B)', + id: 'google/gemma-2b-it', + tokens: 8192, + }, + { + displayName: 'Mistral (7B) Instruct v0.3', + enabled: true, + id: 'mistralai/Mistral-7B-Instruct-v0.3', tokens: 32_768, }, { - displayName: 'Nous Hermes 2 - Mixtral 8x7B-DPO (46.7B)', + displayName: 'Mistral (7B) Instruct v0.2', + id: 'mistralai/Mistral-7B-Instruct-v0.2', + tokens: 32_768, + }, + { + displayName: 'Mistral (7B) Instruct', + id: 'mistralai/Mistral-7B-Instruct-v0.1', + tokens: 8192, + }, + { + displayName: 'Mixtral-8x7B Instruct (46.7B)', enabled: true, - id: 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', + id: 'mistralai/Mixtral-8x7B-Instruct-v0.1', tokens: 32_768, }, { - displayName: 'Nous Hermes-2 Yi (34B)', + displayName: 'Mixtral-8x22B Instruct (141B)', enabled: true, - id: 'NousResearch/Nous-Hermes-2-Yi-34B', - tokens: 4096, + id: 'mistralai/Mixtral-8x22B-Instruct-v0.1', + tokens: 65_536, }, { - displayName: 'Qwen 1.5 Chat (7B)', + displayName: 'DeepSeek LLM Chat (67B)', enabled: true, - id: 'Qwen/Qwen1.5-7B-Chat', - tokens: 32_768, + id: 'deepseek-ai/deepseek-llm-67b-chat', + tokens: 4096, }, { - displayName: 'Qwen 1.5 Chat (32B)', + displayName: 'Qwen 2 Instruct (72B)', enabled: true, - id: 'Qwen/Qwen1.5-32B-Chat', + id: 'Qwen/Qwen2-72B-Instruct', tokens: 32_768, }, { displayName: 'Qwen 1.5 Chat (72B)', - enabled: true, id: 'Qwen/Qwen1.5-72B-Chat', tokens: 32_768, }, + { + displayName: 'Qwen 1.5 Chat (110B)', + id: 'Qwen/Qwen1.5-110B-Chat', + tokens: 32_768, + }, + { + displayName: 'DBRX Instruct', + id: 'databricks/dbrx-instruct', + tokens: 32_768, + }, + { + displayName: 'Upstage SOLAR Instruct v1 (11B)', + id: 'upstage/SOLAR-10.7B-Instruct-v1.0', + tokens: 4096, + }, + { + displayName: 'Nous Hermes 2 - Mixtral 8x7B-DPO (46.7B)', + id: 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', + tokens: 32_768, + }, + { + displayName: 'Nous Hermes-2 Yi (34B)', + id: 'NousResearch/Nous-Hermes-2-Yi-34B', + tokens: 4096, + }, + { + displayName: 'MythoMax-L2 (13B)', + id: 'Gryphe/MythoMax-L2-13b', + tokens: 4096, + }, + { + displayName: 'StripedHyena Nous (7B)', + id: 'togethercomputer/StripedHyena-Nous-7B', + tokens: 32_768, + }, ], - checkModel: 'togethercomputer/alpaca-7b', + checkModel: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo', id: 'togetherai', modelList: { showModelFetcher: true }, name: 'Together AI', diff --git a/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap b/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap index f6b09c6e1247..4a969451ce8d 100644 --- a/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap +++ b/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap @@ -45,7 +45,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Nous Hermes 2 Mixtral 7bx8 DPO is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.", "displayName": "Nous Hermes 2 - Mixtral 8x7B-DPO ", - "enabled": true, + "enabled": false, "functionCall": false, "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "maxOutput": 32768, @@ -65,7 +65,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Nous Hermes 2 - Yi-34B is a state of the art Yi Fine-tune", "displayName": "Nous Hermes-2 Yi (34B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "NousResearch/Nous-Hermes-2-Yi-34B", "maxOutput": 4096, @@ -145,7 +145,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", "displayName": "Qwen 1.5 Chat (32B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "Qwen/Qwen1.5-32B-Chat", "maxOutput": 32768, @@ -165,7 +165,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", "displayName": "Qwen 1.5 Chat (72B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "Qwen/Qwen1.5-72B-Chat", "maxOutput": 32768, @@ -175,7 +175,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", "displayName": "Qwen 1.5 Chat (7B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "Qwen/Qwen1.5-7B-Chat", "maxOutput": 32768, @@ -295,7 +295,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.", "displayName": "Deepseek Coder Instruct (33B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "deepseek-ai/deepseek-coder-33b-instruct", "maxOutput": 16384, @@ -305,7 +305,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese", "displayName": "DeepSeek LLM Chat (67B)", - "enabled": false, + "enabled": true, "functionCall": false, "id": "deepseek-ai/deepseek-llm-67b-chat", "maxOutput": 4096, @@ -325,7 +325,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", "displayName": "Gemma Instruct (2B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "google/gemma-2b-it", "maxOutput": 8192, @@ -365,7 +365,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", "displayName": "LLaMA-2 Chat (13B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "meta-llama/Llama-2-13b-chat-hf", "maxOutput": 4096, @@ -445,7 +445,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.", "displayName": "Mixtral-8x22B Instruct v0.1", - "enabled": false, + "enabled": true, "functionCall": false, "id": "mistralai/Mixtral-8x22B-Instruct-v0.1", "maxOutput": 65536, @@ -565,7 +565,7 @@ exports[`LobeTogetherAI > models > should get models 1`] = ` { "description": "The Yi series models are large language models trained from scratch by developers at 01.AI", "displayName": "01-ai Yi Chat (34B)", - "enabled": true, + "enabled": false, "functionCall": false, "id": "zero-one-ai/Yi-34B-Chat", "maxOutput": 4096,