opea-project · chensuyue · Jul 9, 2024 · Jul 4, 2024 · Jul 4, 2024 · Jul 4, 2024
@@ -19,7 +19,7 @@ export HF_TOKEN=${your_hf_api_token}
 export LANGCHAIN_TRACING_V2=true
 export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms"
-docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
+docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
 ```
 
 ## 1.3 Verify the TGI Service
@@ -114,11 +114,11 @@ curl http://${your_ip}:9000/v1/chat/completions \
 
 ## 4. Validated Model
 
-| Model                     | TGI-Gaudi |
-| ------------------------- | --------- |
-| Intel/neural-chat-7b-v3-3 | ✓         |
-| Llama-2-7b-chat-hf        | ✓         |
-| Llama-2-70b-chat-hf       | ✓         |
-| Meta-Llama-3-8B-Instruct  | ✓         |
-| Meta-Llama-3-70B-Instruct | ✓         |
-| Phi-3                     | x         |
+| Model                     | TGI |
+| ------------------------- | --- |
+| Intel/neural-chat-7b-v3-3 | ✓   |
+| Llama-2-7b-chat-hf        | ✓   |
+| Llama-2-70b-chat-hf       | ✓   |
+| Meta-Llama-3-8B-Instruct  | ✓   |
+| Meta-Llama-3-70B-Instruct | ✓   |
+| Phi-3                     | x   |
@@ -5,7 +5,7 @@ version: "3.8"
 
 services:
   tgi_service:
-    image: ghcr.io/huggingface/text-generation-inference:1.4
+    image: ghcr.io/huggingface/text-generation-inference:2.1.0
     container_name: tgi-service
     ports:
       - "8008:80"

@@ -14,10 +14,10 @@ function build_docker_images() {
 
 function start_service() {
     tgi_endpoint_port=5004
-    export your_hf_llm_model="Intel/neural-chat-7b-v3-3"
+    export your_hf_llm_model=$1
     # Remember to set HF_TOKEN before invoking this test!
     export HF_TOKEN=${HF_TOKEN}
-    docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model}
+    docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model}
     export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}"
 
     tei_service_port=5005
@@ -55,13 +55,20 @@ function stop_docker() {
 function main() {
 
     stop_docker
-
     build_docker_images
-    start_service
 
-    validate_microservice
+    llm_models=(
+    Intel/neural-chat-7b-v3-3
+    Llama-2-7b-chat-hf
+    Meta-Llama-3-8B-Instruct
+    Phi-3
+    )
+    for model in "${llm_models[@]}"; do
+      start_service "${model}"
+      validate_microservice
+      stop_docker
+    done
 
-    stop_docker
     echo y | docker system prune
 
 }