From d487093d10f46e6c2fc1f19c728cc51ac0aadf32 Mon Sep 17 00:00:00 2001 From: XinyaoWa Date: Fri, 30 Aug 2024 12:40:36 +0800 Subject: [PATCH] Add default model in readme for FaqGen and DocSum (#693) * update default model in readme for DocSum Signed-off-by: Xinyao Wang --- DocSum/README.md | 10 ++++++++++ DocSum/docker/gaudi/README.md | 5 +++++ DocSum/docker/xeon/README.md | 5 +++++ DocSum/kubernetes/README.md | 4 +++- FaqGen/docker/gaudi/README.md | 8 +++++++- FaqGen/docker/xeon/README.md | 8 +++++++- FaqGen/kubernetes/manifests/README.md | 11 ++++++++++- 7 files changed, 47 insertions(+), 4 deletions(-) diff --git a/DocSum/README.md b/DocSum/README.md index 7d34e83bf..23519da21 100644 --- a/DocSum/README.md +++ b/DocSum/README.md @@ -23,6 +23,16 @@ Currently we support two ways of deploying Document Summarization services with 2. Start services using the docker images `built from source`: [Guide](./docker) +### Required Models + +We set default model as "Intel/neural-chat-7b-v3-3", change "LLM_MODEL_ID" in "set_env.sh" if you want to use other models. + +``` +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +``` + +If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable. + ### Setup Environment Variable To set up environment variables for deploying Document Summarization services, follow these steps: diff --git a/DocSum/docker/gaudi/README.md b/DocSum/docker/gaudi/README.md index 1b0b1a060..cf48ca885 100644 --- a/DocSum/docker/gaudi/README.md +++ b/DocSum/docker/gaudi/README.md @@ -64,6 +64,11 @@ Then run the command `docker images`, you will have the following Docker Images: ## 🚀 Start Microservices and MegaService +### Required Models + +We set default model as "Intel/neural-chat-7b-v3-3", change "LLM_MODEL_ID" in following setting if you want to use other models. +If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable. + ### Setup Environment Variables Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. diff --git a/DocSum/docker/xeon/README.md b/DocSum/docker/xeon/README.md index d3b1c0463..7a84d47d5 100644 --- a/DocSum/docker/xeon/README.md +++ b/DocSum/docker/xeon/README.md @@ -73,6 +73,11 @@ Then run the command `docker images`, you will have the following Docker Images: ## 🚀 Start Microservices and MegaService +### Required Models + +We set default model as "Intel/neural-chat-7b-v3-3", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models. +If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable. + ### Setup Environment Variables Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. diff --git a/DocSum/kubernetes/README.md b/DocSum/kubernetes/README.md index caf0f5ed6..bd39ee919 100644 --- a/DocSum/kubernetes/README.md +++ b/DocSum/kubernetes/README.md @@ -20,7 +20,9 @@ These will be available on Docker Hub soon, simplifying installation. This involves deploying the application pipeline custom resource. You can use docsum_xeon.yaml if you have just a Xeon cluster or docsum_gaudi.yaml if you have a Gaudi cluster. 1. Setup Environment variables. These are specific to the user. Skip the proxy settings if you are not operating behind one. - + + We use "Intel/neural-chat-7b-v3-3" as an example. If you want to use other models, change "LLM_MODEL_ID" in following setting and change "MODEL_ID" in manifests yaml file. + ```bash export no_proxy=${your_no_proxy} export http_proxy=${your_http_proxy} diff --git a/FaqGen/docker/gaudi/README.md b/FaqGen/docker/gaudi/README.md index 509cbface..79410bfbb 100644 --- a/FaqGen/docker/gaudi/README.md +++ b/FaqGen/docker/gaudi/README.md @@ -64,6 +64,12 @@ Then run the command `docker images`, you will have the following Docker Images: ## 🚀 Start Microservices and MegaService +### Required Models + +We set default model as "meta-llama/Meta-Llama-3-8B-Instruct", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models. + +If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable. + ### Setup Environment Variables Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. @@ -72,7 +78,7 @@ Since the `compose.yaml` will consume some environment variables, you need to se export no_proxy=${your_no_proxy} export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} -export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export TGI_LLM_ENDPOINT="http://${your_ip}:8008" export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} diff --git a/FaqGen/docker/xeon/README.md b/FaqGen/docker/xeon/README.md index e86e6480b..cbe3a726b 100644 --- a/FaqGen/docker/xeon/README.md +++ b/FaqGen/docker/xeon/README.md @@ -63,6 +63,12 @@ Then run the command `docker images`, you will have the following Docker Images: ## 🚀 Start Microservices and MegaService +### Required Models + +We set default model as "meta-llama/Meta-Llama-3-8B-Instruct", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models. + +If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable. + ### Setup Environment Variables Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. @@ -71,7 +77,7 @@ Since the `compose.yaml` will consume some environment variables, you need to se export no_proxy=${your_no_proxy} export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} -export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export TGI_LLM_ENDPOINT="http://${your_ip}:8008" export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} diff --git a/FaqGen/kubernetes/manifests/README.md b/FaqGen/kubernetes/manifests/README.md index dc0c06b5f..360691b5e 100644 --- a/FaqGen/kubernetes/manifests/README.md +++ b/FaqGen/kubernetes/manifests/README.md @@ -3,7 +3,16 @@ > [NOTE] > The following values must be set before you can deploy: > HUGGINGFACEHUB_API_TOKEN -> You can also customize the "MODEL_ID" and "model-volume" +> You can also customize the "MODEL_ID" and "model-volume". + +## Required Models +We set "meta-llama/Meta-Llama-3-8B-Instruct" as default model, if you want to use other models, change arguments "--model-id" in `xeon/faqgen.yaml` or `gaudi/faqgen.yaml`. +``` +- --model-id +- 'meta-llama/Meta-Llama-3-8B-Instruct' +``` + +If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable. ## Deploy On Xeon