From f236949f62e26695ff0f6e7d4fbce8441fb2d8e4 Mon Sep 17 00:00:00 2001
From: "chen, suyue" <suyue.chen@intel.com>
Date: Tue, 9 Jul 2024 22:28:18 +0800
Subject: [PATCH] update tgi with text-generation-inference:2.1.0 (#273)

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 .github/workflows/microservice-test.yml       |  4 ++--
 comps/llms/text-generation/tgi/README.md      | 18 +++++++++---------
 .../llms/text-generation/tgi/build_docker.sh  |  9 ---------
 .../tgi/docker_compose_llm.yaml               |  2 +-
 tests/test_llms_text-generation_tgi.sh        | 19 +++++++++++++------
 5 files changed, 25 insertions(+), 27 deletions(-)
 delete mode 100644 comps/llms/text-generation/tgi/build_docker.sh

diff --git a/.github/workflows/microservice-test.yml b/.github/workflows/microservice-test.yml
index 635025332..d8000b93d 100644
--- a/.github/workflows/microservice-test.yml
+++ b/.github/workflows/microservice-test.yml
@@ -49,14 +49,14 @@ jobs:
           cd tests
           service=$(echo $service_path | tr '/' '_')
           echo "service=${service}" >> $GITHUB_ENV
-          if [ -f test_${service}.sh ]; then timeout 10m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi
+          if [ -f test_${service}.sh ]; then timeout 30m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi
 
       - name: Clean up container
         if: cancelled() || failure()
         run: |
           cid=$(docker ps -aq --filter "name=test-comps-*")
           if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-          echo y | docker system prune
+          echo y | docker system prune --all
 
       - name: Publish pipeline artifact
         if: ${{ !cancelled() }}
diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md
index 1a2ef8ddc..6c9607ca9 100644
--- a/comps/llms/text-generation/tgi/README.md
+++ b/comps/llms/text-generation/tgi/README.md
@@ -19,7 +19,7 @@ export HF_TOKEN=${your_hf_api_token}
 export LANGCHAIN_TRACING_V2=true
 export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms"
-docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
+docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
 ```
 
 ## 1.3 Verify the TGI Service
@@ -114,11 +114,11 @@ curl http://${your_ip}:9000/v1/chat/completions \
 
 ## 4. Validated Model
 
-| Model                     | TGI-Gaudi |
-| ------------------------- | --------- |
-| Intel/neural-chat-7b-v3-3 | ✓         |
-| Llama-2-7b-chat-hf        | ✓         |
-| Llama-2-70b-chat-hf       | ✓         |
-| Meta-Llama-3-8B-Instruct  | ✓         |
-| Meta-Llama-3-70B-Instruct | ✓         |
-| Phi-3                     | x         |
+| Model                     | TGI |
+| ------------------------- | --- |
+| Intel/neural-chat-7b-v3-3 | ✓   |
+| Llama-2-7b-chat-hf        | ✓   |
+| Llama-2-70b-chat-hf       | ✓   |
+| Meta-Llama-3-8B-Instruct  | ✓   |
+| Meta-Llama-3-70B-Instruct | ✓   |
+| Phi-3                     | ✓   |
diff --git a/comps/llms/text-generation/tgi/build_docker.sh b/comps/llms/text-generation/tgi/build_docker.sh
deleted file mode 100644
index 80c00c9fc..000000000
--- a/comps/llms/text-generation/tgi/build_docker.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-git clone https://github.com/huggingface/tgi-gaudi.git
-cd ./tgi-gaudi/
-docker build -t ghcr.io/huggingface/tgi-gaudi:1.2.1 . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
diff --git a/comps/llms/text-generation/tgi/docker_compose_llm.yaml b/comps/llms/text-generation/tgi/docker_compose_llm.yaml
index e52475b30..c1ab98dcc 100644
--- a/comps/llms/text-generation/tgi/docker_compose_llm.yaml
+++ b/comps/llms/text-generation/tgi/docker_compose_llm.yaml
@@ -5,7 +5,7 @@ version: "3.8"
 
 services:
   tgi_service:
-    image: ghcr.io/huggingface/text-generation-inference:1.4
+    image: ghcr.io/huggingface/text-generation-inference:2.1.0
     container_name: tgi-service
     ports:
       - "8008:80"
diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
index 6e62098aa..6b6c17c19 100644
--- a/tests/test_llms_text-generation_tgi.sh
+++ b/tests/test_llms_text-generation_tgi.sh
@@ -14,10 +14,10 @@ function build_docker_images() {
 
 function start_service() {
     tgi_endpoint_port=5004
-    export your_hf_llm_model="Intel/neural-chat-7b-v3-3"
+    export your_hf_llm_model=$1
     # Remember to set HF_TOKEN before invoking this test!
     export HF_TOKEN=${HF_TOKEN}
-    docker run -d --name="test-comps-llm-tgi-endpoint" -e https_proxy -e http_proxy -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
+    docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g -e HF_TOKEN=${HF_TOKEN} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048
     export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}"
 
     tei_service_port=5005
@@ -55,13 +55,20 @@ function stop_docker() {
 function main() {
 
     stop_docker
-
     build_docker_images
-    start_service
 
-    validate_microservice
+    llm_models=(
+    Intel/neural-chat-7b-v3-3
+    meta-llama/Llama-2-7b-chat-hf
+    meta-llama/Meta-Llama-3-8B-Instruct
+    microsoft/Phi-3-mini-4k-instruct
+    )
+    for model in "${llm_models[@]}"; do
+      start_service "${model}"
+      validate_microservice
+      stop_docker
+    done
 
-    stop_docker
     echo y | docker system prune
 
 }