update tgi with text-generation-inference:2.1.0 (#273)

Signed-off-by: chensuyue <[email protected]>
opea-project · Jul 9, 2024 · f236949 · f236949
1 parent 61795fd
commit f236949
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 27 deletions.
diff --git a/.github/workflows/microservice-test.yml b/.github/workflows/microservice-test.yml
@@ -49,14 +49,14 @@ jobs:
           cd tests
           service=$(echo $service_path | tr '/' '_')
           echo "service=${service}" >> $GITHUB_ENV
-          if [ -f test_${service}.sh ]; then timeout 10m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi
+          if [ -f test_${service}.sh ]; then timeout 30m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi
 
       - name: Clean up container
         if: cancelled() || failure()
         run: |
           cid=$(docker ps -aq --filter "name=test-comps-*")
           if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
-          echo y | docker system prune
+          echo y | docker system prune --all
 
       - name: Publish pipeline artifact
         if: ${{ !cancelled() }}

diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md
@@ -19,7 +19,7 @@ export HF_TOKEN=${your_hf_api_token}
 export LANGCHAIN_TRACING_V2=true
 export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms"
-docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
+docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
 ```
 
 ## 1.3 Verify the TGI Service
@@ -114,11 +114,11 @@ curl http://${your_ip}:9000/v1/chat/completions \
 
 ## 4. Validated Model
 
-| Model                     | TGI-Gaudi |
-| ------------------------- | --------- |
-| Intel/neural-chat-7b-v3-3 | ✓         |
-| Llama-2-7b-chat-hf        | ✓         |
-| Llama-2-70b-chat-hf       | ✓         |
-| Meta-Llama-3-8B-Instruct  | ✓         |
-| Meta-Llama-3-70B-Instruct | ✓         |
-| Phi-3                     | x         |
+| Model                     | TGI |
+| ------------------------- | --- |
+| Intel/neural-chat-7b-v3-3 | ✓   |
+| Llama-2-7b-chat-hf        | ✓   |
+| Llama-2-70b-chat-hf       | ✓   |
+| Meta-Llama-3-8B-Instruct  | ✓   |
+| Meta-Llama-3-70B-Instruct | ✓   |
+| Phi-3                     | ✓   |
diff --git a/comps/llms/text-generation/tgi/build_docker.sh b/comps/llms/text-generation/tgi/build_docker.sh
diff --git a/comps/llms/text-generation/tgi/docker_compose_llm.yaml b/comps/llms/text-generation/tgi/docker_compose_llm.yaml
@@ -5,7 +5,7 @@ version: "3.8"
 
 services:
   tgi_service:
-    image: ghcr.io/huggingface/text-generation-inference:1.4
+    image: ghcr.io/huggingface/text-generation-inference:2.1.0
     container_name: tgi-service
     ports:
       - "8008:80"

diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh
@@ -14,10 +14,10 @@ function build_docker_images() {
 
 function start_service() {
     tgi_endpoint_port=5004
-    export your_hf_llm_model="Intel/neural-chat-7b-v3-3"
+    export your_hf_llm_model=$1
     # Remember to set HF_TOKEN before invoking this test!
     export HF_TOKEN=${HF_TOKEN}
-    docker run -d --name="test-comps-llm-tgi-endpoint" -e https_proxy -e http_proxy -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
+    docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g -e HF_TOKEN=${HF_TOKEN} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048
     export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}"
 
     tei_service_port=5005
@@ -55,13 +55,20 @@ function stop_docker() {
 function main() {
 
     stop_docker
-
     build_docker_images
-    start_service
 
-    validate_microservice
+    llm_models=(
+    Intel/neural-chat-7b-v3-3
+    meta-llama/Llama-2-7b-chat-hf
+    meta-llama/Meta-Llama-3-8B-Instruct
+    microsoft/Phi-3-mini-4k-instruct
+    )
+    for model in "${llm_models[@]}"; do
+      start_service "${model}"
+      validate_microservice
+      stop_docker
+    done
 
-    stop_docker
     echo y | docker system prune
 
 }