From 910957759f2513d2c469e08b83efd0b139c1eac6 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Thu, 4 Jul 2024 13:49:27 +0800
Subject: [PATCH 01/13] update text-generation-inference:2.1.0

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 comps/llms/text-generation/tgi/README.md      | 18 +++++++++---------
 .../llms/text-generation/tgi/build_docker.sh  |  9 ---------
 .../tgi/docker_compose_llm.yaml               |  2 +-
 tests/test_llms_text-generation_tgi.sh        | 19 +++++++++++++------
 4 files changed, 23 insertions(+), 25 deletions(-)
 delete mode 100644 comps/llms/text-generation/tgi/build_docker.sh

diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md
index 1a2ef8ddc..44a540313 100644
--- a/comps/llms/text-generation/tgi/README.md
+++ b/comps/llms/text-generation/tgi/README.md
@@ -19,7 +19,7 @@ export HF_TOKEN=${your_hf_api_token}
 export LANGCHAIN_TRACING_V2=true
 export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms"
-docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
+docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
 ```
 
 ## 1.3 Verify the TGI Service
@@ -114,11 +114,11 @@ curl http://${your_ip}:9000/v1/chat/completions \
 
 ## 4. Validated Model
 
-| Model                     | TGI-Gaudi |
-| ------------------------- | --------- |
-| Intel/neural-chat-7b-v3-3 | ✓         |
-| Llama-2-7b-chat-hf        | ✓         |
-| Llama-2-70b-chat-hf       | ✓         |
-| Meta-Llama-3-8B-Instruct  | ✓         |
-| Meta-Llama-3-70B-Instruct | ✓         |
-| Phi-3                     | x         |
+| Model                     | TGI  |
+| ------------------------- |------|
+| Intel/neural-chat-7b-v3-3 | ✓    |
+| Llama-2-7b-chat-hf        | ✓    |
+| Llama-2-70b-chat-hf       | ✓    |
+| Meta-Llama-3-8B-Instruct  | ✓    |
+| Meta-Llama-3-70B-Instruct | ✓    |
+| Phi-3                     | x    |
diff --git a/comps/llms/text-generation/tgi/build_docker.sh b/comps/llms/text-generation/tgi/build_docker.sh
deleted file mode 100644
index 80c00c9fc..000000000
--- a/comps/llms/text-generation/tgi/build_docker.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-git clone https://github.com/huggingface/tgi-gaudi.git
-cd ./tgi-gaudi/
-docker build -t ghcr.io/huggingface/tgi-gaudi:1.2.1 . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
diff --git a/comps/llms/text-generation/tgi/docker_compose_llm.yaml b/comps/llms/text-generation/tgi/docker_compose_llm.yaml
index e52475b30..c1ab98dcc 100644
--- a/comps/llms/text-generation/tgi/docker_compose_llm.yaml
+++ b/comps/llms/text-generation/tgi/docker_compose_llm.yaml
@@ -5,7 +5,7 @@ version: "3.8"
 
 services:
   tgi_service:
-    image: ghcr.io/huggingface/text-generation-inference:1.4
+    image: ghcr.io/huggingface/text-generation-inference:2.1.0
     container_name: tgi-service
     ports:
       - "8008:80"
diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
index e08a885a9..bb3e33c37 100644
--- a/tests/test_llms_text-generation_tgi.sh
+++ b/tests/test_llms_text-generation_tgi.sh
@@ -14,10 +14,10 @@ function build_docker_images() {
 
 function start_service() {
     tgi_endpoint_port=5004
-    export your_hf_llm_model="Intel/neural-chat-7b-v3-3"
+    export your_hf_llm_model=$1
     # Remember to set HF_TOKEN before invoking this test!
     export HF_TOKEN=${HF_TOKEN}
-    docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
+    docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
     export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}"
 
     tei_service_port=5005
@@ -55,13 +55,20 @@ function stop_docker() {
 function main() {
 
     stop_docker
-
     build_docker_images
-    start_service
 
-    validate_microservice
+    llm_models=(
+    Intel/neural-chat-7b-v3-3
+    Llama-2-7b-chat-hf
+    Meta-Llama-3-8B-Instruct
+    Phi-3
+    )
+    for model in "${llm_models[@]}"; do
+      start_service "${model}"
+      validate_microservice
+      stop_docker
+    done
 
-    stop_docker
     echo y | docker system prune
 
 }

From 6d73a9f2b23002d56156c0de2f5eee53ef11229b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 4 Jul 2024 05:52:08 +0000
Subject: [PATCH 02/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/llms/text-generation/tgi/README.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md
index 44a540313..c82e43e0e 100644
--- a/comps/llms/text-generation/tgi/README.md
+++ b/comps/llms/text-generation/tgi/README.md
@@ -114,11 +114,11 @@ curl http://${your_ip}:9000/v1/chat/completions \
 
 ## 4. Validated Model
 
-| Model                     | TGI  |
-| ------------------------- |------|
-| Intel/neural-chat-7b-v3-3 | ✓    |
-| Llama-2-7b-chat-hf        | ✓    |
-| Llama-2-70b-chat-hf       | ✓    |
-| Meta-Llama-3-8B-Instruct  | ✓    |
-| Meta-Llama-3-70B-Instruct | ✓    |
-| Phi-3                     | x    |
+| Model                     | TGI |
+| ------------------------- | --- |
+| Intel/neural-chat-7b-v3-3 | ✓   |
+| Llama-2-7b-chat-hf        | ✓   |
+| Llama-2-70b-chat-hf       | ✓   |
+| Meta-Llama-3-8B-Instruct  | ✓   |
+| Meta-Llama-3-70B-Instruct | ✓   |
+| Phi-3                     | x   |

From dff0a9ac777851052d4501d8d1b400e9e5d800a7 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Thu, 4 Jul 2024 15:48:05 +0800
Subject: [PATCH 03/13] add overall system prune

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 .github/workflows/microservice-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/microservice-test.yml b/.github/workflows/microservice-test.yml
index 635025332..2324e2eff 100644
--- a/.github/workflows/microservice-test.yml
+++ b/.github/workflows/microservice-test.yml
@@ -56,7 +56,7 @@ jobs:
         run: |
           cid=$(docker ps -aq --filter "name=test-comps-*")
           if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-          echo y | docker system prune
+          echo y | docker system prune --all
 
       - name: Publish pipeline artifact
         if: ${{ !cancelled() }}

From a6cd5b2300a64b5fdbb0c4c04ca71c54baa2d00e Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Fri, 5 Jul 2024 09:26:36 +0800
Subject: [PATCH 04/13] update model name

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 .github/workflows/microservice-test.yml | 2 +-
 tests/test_llms_text-generation_tgi.sh  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/microservice-test.yml b/.github/workflows/microservice-test.yml
index 2324e2eff..d8000b93d 100644
--- a/.github/workflows/microservice-test.yml
+++ b/.github/workflows/microservice-test.yml
@@ -49,7 +49,7 @@ jobs:
           cd tests
           service=$(echo $service_path | tr '/' '_')
           echo "service=${service}" >> $GITHUB_ENV
-          if [ -f test_${service}.sh ]; then timeout 10m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi
+          if [ -f test_${service}.sh ]; then timeout 30m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi
 
       - name: Clean up container
         if: cancelled() || failure()
diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
index bb3e33c37..d136b47f4 100644
--- a/tests/test_llms_text-generation_tgi.sh
+++ b/tests/test_llms_text-generation_tgi.sh
@@ -59,9 +59,9 @@ function main() {
 
     llm_models=(
     Intel/neural-chat-7b-v3-3
-    Llama-2-7b-chat-hf
-    Meta-Llama-3-8B-Instruct
-    Phi-3
+    meta-llama/Llama-2-7b-chat-hf
+    meta-llama/Meta-Llama-3-8B-Instruct
+    microsoft/Phi-3-mini-4k-instruct
     )
     for model in "${llm_models[@]}"; do
       start_service "${model}"

From 6a42265fcc90cf89939f2f310e7c1fba9659106d Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Fri, 5 Jul 2024 09:27:24 +0800
Subject: [PATCH 05/13] fix test scripts name

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 ...st_llm_summarization_tgi.sh => test_llms_summarization_tgi.sh} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{test_llm_summarization_tgi.sh => test_llms_summarization_tgi.sh} (100%)

diff --git a/tests/test_llm_summarization_tgi.sh b/tests/test_llms_summarization_tgi.sh
similarity index 100%
rename from tests/test_llm_summarization_tgi.sh
rename to tests/test_llms_summarization_tgi.sh

From f7bc29e3937e16acdb39025a46f590d0e1f06de7 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Fri, 5 Jul 2024 09:54:11 +0800
Subject: [PATCH 06/13] skip one model

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 tests/test_llms_text-generation_tgi.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
index d136b47f4..fb33b7215 100644
--- a/tests/test_llms_text-generation_tgi.sh
+++ b/tests/test_llms_text-generation_tgi.sh
@@ -59,7 +59,7 @@ function main() {
 
     llm_models=(
     Intel/neural-chat-7b-v3-3
-    meta-llama/Llama-2-7b-chat-hf
+    # meta-llama/Llama-2-7b-chat-hf
     meta-llama/Meta-Llama-3-8B-Instruct
     microsoft/Phi-3-mini-4k-instruct
     )

From f81284d2e77d33e1b2346037602d56dcf5363021 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Fri, 5 Jul 2024 11:01:40 +0800
Subject: [PATCH 07/13] retest after update hf token

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 tests/test_llms_text-generation_tgi.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
index fb33b7215..d136b47f4 100644
--- a/tests/test_llms_text-generation_tgi.sh
+++ b/tests/test_llms_text-generation_tgi.sh
@@ -59,7 +59,7 @@ function main() {
 
     llm_models=(
     Intel/neural-chat-7b-v3-3
-    # meta-llama/Llama-2-7b-chat-hf
+    meta-llama/Llama-2-7b-chat-hf
     meta-llama/Meta-Llama-3-8B-Instruct
     microsoft/Phi-3-mini-4k-instruct
     )

From 6edb92977da17155aad74e5755a9aa689fe79855 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Fri, 5 Jul 2024 17:03:37 +0800
Subject: [PATCH 08/13] for test

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 tests/test_llms_text-generation_tgi.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
index d136b47f4..fbc25d093 100644
--- a/tests/test_llms_text-generation_tgi.sh
+++ b/tests/test_llms_text-generation_tgi.sh
@@ -58,9 +58,9 @@ function main() {
     build_docker_images
 
     llm_models=(
-    Intel/neural-chat-7b-v3-3
-    meta-llama/Llama-2-7b-chat-hf
-    meta-llama/Meta-Llama-3-8B-Instruct
+#    Intel/neural-chat-7b-v3-3
+#    meta-llama/Llama-2-7b-chat-hf
+#    meta-llama/Meta-Llama-3-8B-Instruct
     microsoft/Phi-3-mini-4k-instruct
     )
     for model in "${llm_models[@]}"; do

From 55ed322cfca9a6006bebb8bdd3b4e1818a070f9e Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Tue, 9 Jul 2024 10:26:56 +0800
Subject: [PATCH 09/13] bug fix

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 tests/test_llms_text-generation_tgi.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
index fbc25d093..8f26558cf 100644
--- a/tests/test_llms_text-generation_tgi.sh
+++ b/tests/test_llms_text-generation_tgi.sh
@@ -17,7 +17,7 @@ function start_service() {
     export your_hf_llm_model=$1
     # Remember to set HF_TOKEN before invoking this test!
     export HF_TOKEN=${HF_TOKEN}
-    docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
+    docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048
     export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}"
 
     tei_service_port=5005

From 68c29e225ea50a9bb39ec9c5dc2a8109b1caac72 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Tue, 9 Jul 2024 14:03:26 +0800
Subject: [PATCH 10/13] add more model for test

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 tests/test_llms_text-generation_tgi.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
index 8f26558cf..6ce0563e2 100644
--- a/tests/test_llms_text-generation_tgi.sh
+++ b/tests/test_llms_text-generation_tgi.sh
@@ -58,9 +58,9 @@ function main() {
     build_docker_images
 
     llm_models=(
-#    Intel/neural-chat-7b-v3-3
-#    meta-llama/Llama-2-7b-chat-hf
-#    meta-llama/Meta-Llama-3-8B-Instruct
+    Intel/neural-chat-7b-v3-3
+    meta-llama/Llama-2-7b-chat-hf
+    meta-llama/Meta-Llama-3-8B-Instruct
     microsoft/Phi-3-mini-4k-instruct
     )
     for model in "${llm_models[@]}"; do

From 1924f7f97fe909fb613fb99412a1760405c68354 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Tue, 9 Jul 2024 14:04:41 +0800
Subject: [PATCH 11/13] update readme

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 comps/llms/text-generation/tgi/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md
index c82e43e0e..6c9607ca9 100644
--- a/comps/llms/text-generation/tgi/README.md
+++ b/comps/llms/text-generation/tgi/README.md
@@ -121,4 +121,4 @@ curl http://${your_ip}:9000/v1/chat/completions \
 | Llama-2-70b-chat-hf       | ✓   |
 | Meta-Llama-3-8B-Instruct  | ✓   |
 | Meta-Llama-3-70B-Instruct | ✓   |
-| Phi-3                     | x   |
+| Phi-3                     | ✓   |

From aa56a5e5b779c4b9da33dfd7619de7320891a78a Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Tue, 9 Jul 2024 16:58:08 +0800
Subject: [PATCH 12/13] test after fix token

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 tests/test_llms_text-generation_tgi.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
index 6ce0563e2..1dc778c3f 100644
--- a/tests/test_llms_text-generation_tgi.sh
+++ b/tests/test_llms_text-generation_tgi.sh
@@ -17,7 +17,7 @@ function start_service() {
     export your_hf_llm_model=$1
     # Remember to set HF_TOKEN before invoking this test!
     export HF_TOKEN=${HF_TOKEN}
-    docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048
+    docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g -e HF_TOKEN=${HF_TOKEN} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048
     export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}"
 
     tei_service_port=5005
@@ -58,10 +58,10 @@ function main() {
     build_docker_images
 
     llm_models=(
-    Intel/neural-chat-7b-v3-3
+    # Intel/neural-chat-7b-v3-3
     meta-llama/Llama-2-7b-chat-hf
     meta-llama/Meta-Llama-3-8B-Instruct
-    microsoft/Phi-3-mini-4k-instruct
+    # microsoft/Phi-3-mini-4k-instruct
     )
     for model in "${llm_models[@]}"; do
       start_service "${model}"

From fdc1612d7aeeea30fe741c10e76425b0a3d94063 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Tue, 9 Jul 2024 22:27:18 +0800
Subject: [PATCH 13/13] add model for test

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 tests/test_llms_text-generation_tgi.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
index 153974617..6b6c17c19 100644
--- a/tests/test_llms_text-generation_tgi.sh
+++ b/tests/test_llms_text-generation_tgi.sh
@@ -58,10 +58,10 @@ function main() {
     build_docker_images
 
     llm_models=(
-    # Intel/neural-chat-7b-v3-3
+    Intel/neural-chat-7b-v3-3
     meta-llama/Llama-2-7b-chat-hf
     meta-llama/Meta-Llama-3-8B-Instruct
-    # microsoft/Phi-3-mini-4k-instruct
+    microsoft/Phi-3-mini-4k-instruct
     )
     for model in "${llm_models[@]}"; do
       start_service "${model}"