From d487093d10f46e6c2fc1f19c728cc51ac0aadf32 Mon Sep 17 00:00:00 2001
From: XinyaoWa <xinyao.wang@intel.com>
Date: Fri, 30 Aug 2024 12:40:36 +0800
Subject: [PATCH] Add default model in readme for FaqGen and DocSum (#693)

* update default model in readme for DocSum

Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
---
 DocSum/README.md                      | 10 ++++++++++
 DocSum/docker/gaudi/README.md         |  5 +++++
 DocSum/docker/xeon/README.md          |  5 +++++
 DocSum/kubernetes/README.md           |  4 +++-
 FaqGen/docker/gaudi/README.md         |  8 +++++++-
 FaqGen/docker/xeon/README.md          |  8 +++++++-
 FaqGen/kubernetes/manifests/README.md | 11 ++++++++++-
 7 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/DocSum/README.md b/DocSum/README.md
index 7d34e83bf..23519da21 100644
--- a/DocSum/README.md
+++ b/DocSum/README.md
@@ -23,6 +23,16 @@ Currently we support two ways of deploying Document Summarization services with
 
 2. Start services using the docker images `built from source`: [Guide](./docker)
 
+### Required Models
+
+We set default model as "Intel/neural-chat-7b-v3-3", change "LLM_MODEL_ID" in "set_env.sh" if you want to use other models.
+
+```
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+```
+
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
+
 ### Setup Environment Variable
 
 To set up environment variables for deploying Document Summarization services, follow these steps:
diff --git a/DocSum/docker/gaudi/README.md b/DocSum/docker/gaudi/README.md
index 1b0b1a060..cf48ca885 100644
--- a/DocSum/docker/gaudi/README.md
+++ b/DocSum/docker/gaudi/README.md
@@ -64,6 +64,11 @@ Then run the command `docker images`, you will have the following Docker Images:
 
 ## 🚀 Start Microservices and MegaService
 
+### Required Models
+
+We set default model as "Intel/neural-chat-7b-v3-3", change "LLM_MODEL_ID" in following setting if you want to use other models.
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
+
 ### Setup Environment Variables
 
 Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
diff --git a/DocSum/docker/xeon/README.md b/DocSum/docker/xeon/README.md
index d3b1c0463..7a84d47d5 100644
--- a/DocSum/docker/xeon/README.md
+++ b/DocSum/docker/xeon/README.md
@@ -73,6 +73,11 @@ Then run the command `docker images`, you will have the following Docker Images:
 
 ## 🚀 Start Microservices and MegaService
 
+### Required Models
+
+We set default model as "Intel/neural-chat-7b-v3-3", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
+
 ### Setup Environment Variables
 
 Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
diff --git a/DocSum/kubernetes/README.md b/DocSum/kubernetes/README.md
index caf0f5ed6..bd39ee919 100644
--- a/DocSum/kubernetes/README.md
+++ b/DocSum/kubernetes/README.md
@@ -20,7 +20,9 @@ These will be available on Docker Hub soon, simplifying installation.
 This involves deploying the application pipeline custom resource. You can use docsum_xeon.yaml if you have just a Xeon cluster or docsum_gaudi.yaml if you have a Gaudi cluster.
 
 1. Setup Environment variables. These are specific to the user. Skip the proxy settings if you are not operating behind one.
-
+   
+   We use "Intel/neural-chat-7b-v3-3" as an example. If you want to use other models, change "LLM_MODEL_ID" in following setting and change "MODEL_ID" in manifests yaml file.
+   
    ```bash
    export no_proxy=${your_no_proxy}
    export http_proxy=${your_http_proxy}
diff --git a/FaqGen/docker/gaudi/README.md b/FaqGen/docker/gaudi/README.md
index 509cbface..79410bfbb 100644
--- a/FaqGen/docker/gaudi/README.md
+++ b/FaqGen/docker/gaudi/README.md
@@ -64,6 +64,12 @@ Then run the command `docker images`, you will have the following Docker Images:
 
 ## 🚀 Start Microservices and MegaService
 
+### Required Models
+
+We set default model as "meta-llama/Meta-Llama-3-8B-Instruct", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.
+
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
+
 ### Setup Environment Variables
 
 Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
@@ -72,7 +78,7 @@ Since the `compose.yaml` will consume some environment variables, you need to se
 export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
-export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
diff --git a/FaqGen/docker/xeon/README.md b/FaqGen/docker/xeon/README.md
index e86e6480b..cbe3a726b 100644
--- a/FaqGen/docker/xeon/README.md
+++ b/FaqGen/docker/xeon/README.md
@@ -63,6 +63,12 @@ Then run the command `docker images`, you will have the following Docker Images:
 
 ## 🚀 Start Microservices and MegaService
 
+### Required Models
+
+We set default model as "meta-llama/Meta-Llama-3-8B-Instruct", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.
+
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
+
 ### Setup Environment Variables
 
 Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
@@ -71,7 +77,7 @@ Since the `compose.yaml` will consume some environment variables, you need to se
 export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
-export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
diff --git a/FaqGen/kubernetes/manifests/README.md b/FaqGen/kubernetes/manifests/README.md
index dc0c06b5f..360691b5e 100644
--- a/FaqGen/kubernetes/manifests/README.md
+++ b/FaqGen/kubernetes/manifests/README.md
@@ -3,7 +3,16 @@
 > [NOTE]
 > The following values must be set before you can deploy:
 > HUGGINGFACEHUB_API_TOKEN
-> You can also customize the "MODEL_ID" and "model-volume"
+> You can also customize the "MODEL_ID" and "model-volume". 
+
+## Required Models
+We set "meta-llama/Meta-Llama-3-8B-Instruct" as default model, if you want to use other models, change arguments "--model-id" in `xeon/faqgen.yaml` or `gaudi/faqgen.yaml`.
+```
+- --model-id
+- 'meta-llama/Meta-Llama-3-8B-Instruct'
+```
+
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
 
 ## Deploy On Xeon