From 910957759f2513d2c469e08b83efd0b139c1eac6 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Thu, 4 Jul 2024 13:49:27 +0800 Subject: [PATCH 01/13] update text-generation-inference:2.1.0 Signed-off-by: chensuyue --- comps/llms/text-generation/tgi/README.md | 18 +++++++++--------- .../llms/text-generation/tgi/build_docker.sh | 9 --------- .../tgi/docker_compose_llm.yaml | 2 +- tests/test_llms_text-generation_tgi.sh | 19 +++++++++++++------ 4 files changed, 23 insertions(+), 25 deletions(-) delete mode 100644 comps/llms/text-generation/tgi/build_docker.sh diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md index 1a2ef8ddc..44a540313 100644 --- a/comps/llms/text-generation/tgi/README.md +++ b/comps/llms/text-generation/tgi/README.md @@ -19,7 +19,7 @@ export HF_TOKEN=${your_hf_api_token} export LANGCHAIN_TRACING_V2=true export LANGCHAIN_API_KEY=${your_langchain_api_key} export LANGCHAIN_PROJECT="opea/gen-ai-comps:llms" -docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model} +docker run -p 8008:80 -v ./data:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} ``` ## 1.3 Verify the TGI Service @@ -114,11 +114,11 @@ curl http://${your_ip}:9000/v1/chat/completions \ ## 4. Validated Model -| Model | TGI-Gaudi | -| ------------------------- | --------- | -| Intel/neural-chat-7b-v3-3 | ✓ | -| Llama-2-7b-chat-hf | ✓ | -| Llama-2-70b-chat-hf | ✓ | -| Meta-Llama-3-8B-Instruct | ✓ | -| Meta-Llama-3-70B-Instruct | ✓ | -| Phi-3 | x | +| Model | TGI | +| ------------------------- |------| +| Intel/neural-chat-7b-v3-3 | ✓ | +| Llama-2-7b-chat-hf | ✓ | +| Llama-2-70b-chat-hf | ✓ | +| Meta-Llama-3-8B-Instruct | ✓ | +| Meta-Llama-3-70B-Instruct | ✓ | +| Phi-3 | x | diff --git a/comps/llms/text-generation/tgi/build_docker.sh b/comps/llms/text-generation/tgi/build_docker.sh deleted file mode 100644 index 80c00c9fc..000000000 --- a/comps/llms/text-generation/tgi/build_docker.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -git clone https://github.com/huggingface/tgi-gaudi.git -cd ./tgi-gaudi/ -docker build -t ghcr.io/huggingface/tgi-gaudi:1.2.1 . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy diff --git a/comps/llms/text-generation/tgi/docker_compose_llm.yaml b/comps/llms/text-generation/tgi/docker_compose_llm.yaml index e52475b30..c1ab98dcc 100644 --- a/comps/llms/text-generation/tgi/docker_compose_llm.yaml +++ b/comps/llms/text-generation/tgi/docker_compose_llm.yaml @@ -5,7 +5,7 @@ version: "3.8" services: tgi_service: - image: ghcr.io/huggingface/text-generation-inference:1.4 + image: ghcr.io/huggingface/text-generation-inference:2.1.0 container_name: tgi-service ports: - "8008:80" diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh index e08a885a9..bb3e33c37 100644 --- a/tests/test_llms_text-generation_tgi.sh +++ b/tests/test_llms_text-generation_tgi.sh @@ -14,10 +14,10 @@ function build_docker_images() { function start_service() { tgi_endpoint_port=5004 - export your_hf_llm_model="Intel/neural-chat-7b-v3-3" + export your_hf_llm_model=$1 # Remember to set HF_TOKEN before invoking this test! export HF_TOKEN=${HF_TOKEN} - docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:1.4 --model-id ${your_hf_llm_model} + docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}" tei_service_port=5005 @@ -55,13 +55,20 @@ function stop_docker() { function main() { stop_docker - build_docker_images - start_service - validate_microservice + llm_models=( + Intel/neural-chat-7b-v3-3 + Llama-2-7b-chat-hf + Meta-Llama-3-8B-Instruct + Phi-3 + ) + for model in "${llm_models[@]}"; do + start_service "${model}" + validate_microservice + stop_docker + done - stop_docker echo y | docker system prune } From 6d73a9f2b23002d56156c0de2f5eee53ef11229b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 4 Jul 2024 05:52:08 +0000 Subject: [PATCH 02/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/llms/text-generation/tgi/README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md index 44a540313..c82e43e0e 100644 --- a/comps/llms/text-generation/tgi/README.md +++ b/comps/llms/text-generation/tgi/README.md @@ -114,11 +114,11 @@ curl http://${your_ip}:9000/v1/chat/completions \ ## 4. Validated Model -| Model | TGI | -| ------------------------- |------| -| Intel/neural-chat-7b-v3-3 | ✓ | -| Llama-2-7b-chat-hf | ✓ | -| Llama-2-70b-chat-hf | ✓ | -| Meta-Llama-3-8B-Instruct | ✓ | -| Meta-Llama-3-70B-Instruct | ✓ | -| Phi-3 | x | +| Model | TGI | +| ------------------------- | --- | +| Intel/neural-chat-7b-v3-3 | ✓ | +| Llama-2-7b-chat-hf | ✓ | +| Llama-2-70b-chat-hf | ✓ | +| Meta-Llama-3-8B-Instruct | ✓ | +| Meta-Llama-3-70B-Instruct | ✓ | +| Phi-3 | x | From dff0a9ac777851052d4501d8d1b400e9e5d800a7 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Thu, 4 Jul 2024 15:48:05 +0800 Subject: [PATCH 03/13] add overall system prune Signed-off-by: chensuyue --- .github/workflows/microservice-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/microservice-test.yml b/.github/workflows/microservice-test.yml index 635025332..2324e2eff 100644 --- a/.github/workflows/microservice-test.yml +++ b/.github/workflows/microservice-test.yml @@ -56,7 +56,7 @@ jobs: run: | cid=$(docker ps -aq --filter "name=test-comps-*") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi - echo y | docker system prune + echo y | docker system prune --all - name: Publish pipeline artifact if: ${{ !cancelled() }} From a6cd5b2300a64b5fdbb0c4c04ca71c54baa2d00e Mon Sep 17 00:00:00 2001 From: chensuyue Date: Fri, 5 Jul 2024 09:26:36 +0800 Subject: [PATCH 04/13] update model name Signed-off-by: chensuyue --- .github/workflows/microservice-test.yml | 2 +- tests/test_llms_text-generation_tgi.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/microservice-test.yml b/.github/workflows/microservice-test.yml index 2324e2eff..d8000b93d 100644 --- a/.github/workflows/microservice-test.yml +++ b/.github/workflows/microservice-test.yml @@ -49,7 +49,7 @@ jobs: cd tests service=$(echo $service_path | tr '/' '_') echo "service=${service}" >> $GITHUB_ENV - if [ -f test_${service}.sh ]; then timeout 10m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi + if [ -f test_${service}.sh ]; then timeout 30m bash test_${service}.sh; else echo "Test script not found, skip test!"; fi - name: Clean up container if: cancelled() || failure() diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh index bb3e33c37..d136b47f4 100644 --- a/tests/test_llms_text-generation_tgi.sh +++ b/tests/test_llms_text-generation_tgi.sh @@ -59,9 +59,9 @@ function main() { llm_models=( Intel/neural-chat-7b-v3-3 - Llama-2-7b-chat-hf - Meta-Llama-3-8B-Instruct - Phi-3 + meta-llama/Llama-2-7b-chat-hf + meta-llama/Meta-Llama-3-8B-Instruct + microsoft/Phi-3-mini-4k-instruct ) for model in "${llm_models[@]}"; do start_service "${model}" From 6a42265fcc90cf89939f2f310e7c1fba9659106d Mon Sep 17 00:00:00 2001 From: chensuyue Date: Fri, 5 Jul 2024 09:27:24 +0800 Subject: [PATCH 05/13] fix test scripts name Signed-off-by: chensuyue --- ...st_llm_summarization_tgi.sh => test_llms_summarization_tgi.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_llm_summarization_tgi.sh => test_llms_summarization_tgi.sh} (100%) diff --git a/tests/test_llm_summarization_tgi.sh b/tests/test_llms_summarization_tgi.sh similarity index 100% rename from tests/test_llm_summarization_tgi.sh rename to tests/test_llms_summarization_tgi.sh From f7bc29e3937e16acdb39025a46f590d0e1f06de7 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Fri, 5 Jul 2024 09:54:11 +0800 Subject: [PATCH 06/13] skip one model Signed-off-by: chensuyue --- tests/test_llms_text-generation_tgi.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh index d136b47f4..fb33b7215 100644 --- a/tests/test_llms_text-generation_tgi.sh +++ b/tests/test_llms_text-generation_tgi.sh @@ -59,7 +59,7 @@ function main() { llm_models=( Intel/neural-chat-7b-v3-3 - meta-llama/Llama-2-7b-chat-hf + # meta-llama/Llama-2-7b-chat-hf meta-llama/Meta-Llama-3-8B-Instruct microsoft/Phi-3-mini-4k-instruct ) From f81284d2e77d33e1b2346037602d56dcf5363021 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Fri, 5 Jul 2024 11:01:40 +0800 Subject: [PATCH 07/13] retest after update hf token Signed-off-by: chensuyue --- tests/test_llms_text-generation_tgi.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh index fb33b7215..d136b47f4 100644 --- a/tests/test_llms_text-generation_tgi.sh +++ b/tests/test_llms_text-generation_tgi.sh @@ -59,7 +59,7 @@ function main() { llm_models=( Intel/neural-chat-7b-v3-3 - # meta-llama/Llama-2-7b-chat-hf + meta-llama/Llama-2-7b-chat-hf meta-llama/Meta-Llama-3-8B-Instruct microsoft/Phi-3-mini-4k-instruct ) From 6edb92977da17155aad74e5755a9aa689fe79855 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Fri, 5 Jul 2024 17:03:37 +0800 Subject: [PATCH 08/13] for test Signed-off-by: chensuyue --- tests/test_llms_text-generation_tgi.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh index d136b47f4..fbc25d093 100644 --- a/tests/test_llms_text-generation_tgi.sh +++ b/tests/test_llms_text-generation_tgi.sh @@ -58,9 +58,9 @@ function main() { build_docker_images llm_models=( - Intel/neural-chat-7b-v3-3 - meta-llama/Llama-2-7b-chat-hf - meta-llama/Meta-Llama-3-8B-Instruct +# Intel/neural-chat-7b-v3-3 +# meta-llama/Llama-2-7b-chat-hf +# meta-llama/Meta-Llama-3-8B-Instruct microsoft/Phi-3-mini-4k-instruct ) for model in "${llm_models[@]}"; do From 55ed322cfca9a6006bebb8bdd3b4e1818a070f9e Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 9 Jul 2024 10:26:56 +0800 Subject: [PATCH 09/13] bug fix Signed-off-by: chensuyue --- tests/test_llms_text-generation_tgi.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh index fbc25d093..8f26558cf 100644 --- a/tests/test_llms_text-generation_tgi.sh +++ b/tests/test_llms_text-generation_tgi.sh @@ -17,7 +17,7 @@ function start_service() { export your_hf_llm_model=$1 # Remember to set HF_TOKEN before invoking this test! export HF_TOKEN=${HF_TOKEN} - docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} + docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048 export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}" tei_service_port=5005 From 68c29e225ea50a9bb39ec9c5dc2a8109b1caac72 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 9 Jul 2024 14:03:26 +0800 Subject: [PATCH 10/13] add more model for test Signed-off-by: chensuyue --- tests/test_llms_text-generation_tgi.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh index 8f26558cf..6ce0563e2 100644 --- a/tests/test_llms_text-generation_tgi.sh +++ b/tests/test_llms_text-generation_tgi.sh @@ -58,9 +58,9 @@ function main() { build_docker_images llm_models=( -# Intel/neural-chat-7b-v3-3 -# meta-llama/Llama-2-7b-chat-hf -# meta-llama/Meta-Llama-3-8B-Instruct + Intel/neural-chat-7b-v3-3 + meta-llama/Llama-2-7b-chat-hf + meta-llama/Meta-Llama-3-8B-Instruct microsoft/Phi-3-mini-4k-instruct ) for model in "${llm_models[@]}"; do From 1924f7f97fe909fb613fb99412a1760405c68354 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 9 Jul 2024 14:04:41 +0800 Subject: [PATCH 11/13] update readme Signed-off-by: chensuyue --- comps/llms/text-generation/tgi/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comps/llms/text-generation/tgi/README.md b/comps/llms/text-generation/tgi/README.md index c82e43e0e..6c9607ca9 100644 --- a/comps/llms/text-generation/tgi/README.md +++ b/comps/llms/text-generation/tgi/README.md @@ -121,4 +121,4 @@ curl http://${your_ip}:9000/v1/chat/completions \ | Llama-2-70b-chat-hf | ✓ | | Meta-Llama-3-8B-Instruct | ✓ | | Meta-Llama-3-70B-Instruct | ✓ | -| Phi-3 | x | +| Phi-3 | ✓ | From aa56a5e5b779c4b9da33dfd7619de7320891a78a Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 9 Jul 2024 16:58:08 +0800 Subject: [PATCH 12/13] test after fix token Signed-off-by: chensuyue --- tests/test_llms_text-generation_tgi.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh index 6ce0563e2..1dc778c3f 100644 --- a/tests/test_llms_text-generation_tgi.sh +++ b/tests/test_llms_text-generation_tgi.sh @@ -17,7 +17,7 @@ function start_service() { export your_hf_llm_model=$1 # Remember to set HF_TOKEN before invoking this test! export HF_TOKEN=${HF_TOKEN} - docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048 + docker run -d --name="test-comps-llm-tgi-endpoint" -p $tgi_endpoint_port:80 -v ./data:/data --shm-size 1g -e HF_TOKEN=${HF_TOKEN} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id ${your_hf_llm_model} --max-input-tokens 1024 --max-total-tokens 2048 export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_endpoint_port}" tei_service_port=5005 @@ -58,10 +58,10 @@ function main() { build_docker_images llm_models=( - Intel/neural-chat-7b-v3-3 + # Intel/neural-chat-7b-v3-3 meta-llama/Llama-2-7b-chat-hf meta-llama/Meta-Llama-3-8B-Instruct - microsoft/Phi-3-mini-4k-instruct + # microsoft/Phi-3-mini-4k-instruct ) for model in "${llm_models[@]}"; do start_service "${model}" From fdc1612d7aeeea30fe741c10e76425b0a3d94063 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 9 Jul 2024 22:27:18 +0800 Subject: [PATCH 13/13] add model for test Signed-off-by: chensuyue --- tests/test_llms_text-generation_tgi.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_llms_text-generation_tgi.sh b/tests/test_llms_text-generation_tgi.sh index 153974617..6b6c17c19 100644 --- a/tests/test_llms_text-generation_tgi.sh +++ b/tests/test_llms_text-generation_tgi.sh @@ -58,10 +58,10 @@ function main() { build_docker_images llm_models=( - # Intel/neural-chat-7b-v3-3 + Intel/neural-chat-7b-v3-3 meta-llama/Llama-2-7b-chat-hf meta-llama/Meta-Llama-3-8B-Instruct - # microsoft/Phi-3-mini-4k-instruct + microsoft/Phi-3-mini-4k-instruct ) for model in "${llm_models[@]}"; do start_service "${model}"