Skip to content

Commit

Permalink
Add DocSum llm service manifests (#111)
Browse files Browse the repository at this point in the history
DocSum manifest support.
Regenerate the CodeTrans yaml file.

Signed-off-by: Dolpher Du <[email protected]>
  • Loading branch information
yongfengdu authored Jun 20, 2024
1 parent 18f53c9 commit 9ab8de0
Show file tree
Hide file tree
Showing 12 changed files with 548 additions and 388 deletions.
62 changes: 61 additions & 1 deletion .github/workflows/scripts/e2e/manifest_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ MOUNT_DIR=/home/$USER_ID/charts-mnt
# IMAGE_REPO is $OPEA_IMAGE_REPO, or else ""
IMAGE_REPO=${OPEA_IMAGE_REPO:-""}

function init_docsum() {
# executed under path manifest/docsum/xeon
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt#path: $MOUNT_DIR#g" {} \;
# replace the repository "image: opea/*" with "image: ${IMAGE_REPO}opea/"
find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \;
# set huggingface token
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
}

function init_codetrans() {
# executed under path manifest/codetrans/xeon
# replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
Expand All @@ -29,6 +39,11 @@ function init_codegen() {
find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
}

function install_docsum {
echo "namespace is $NAMESPACE"
kubectl apply -f . -n $NAMESPACE
}

function install_codetrans {
echo "namespace is $NAMESPACE"
kubectl apply -f . -n $NAMESPACE
Expand Down Expand Up @@ -61,6 +76,35 @@ function install_chatqna {
kubectl apply -f chaqna-xeon-backend-server.yaml -n $NAMESPACE
}

function validate_docsum() {
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
echo "try to curl http://${ip_address}:${port}/v1/chat/docsum..."
# Curl the DocSum LLM Service
curl http://${ip_address}:${port}/v1/chat/docsum \
-X POST \
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
-H 'Content-Type: application/json' > $LOG_PATH/curl_docsum.log
exit_code=$?
if [ $exit_code -ne 0 ]; then
echo "LLM for docsum failed, please check the logs in ${LOG_PATH}!"
exit 1
fi

echo "Checking response results, make sure the output is reasonable. "
local status=false
if [[ -f $LOG_PATH/curl_docsum.log ]] && \
[[ $(grep -c "TEI" $LOG_PATH/curl_docsum.log) != 0 ]]; then
status=true
fi

if [ $status == false ]; then
echo "Response check failed, please check the logs in artifacts!"
else
echo "Response check succeed!"
fi
}

function validate_codetrans() {
ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
Expand Down Expand Up @@ -158,6 +202,11 @@ if [ $# -eq 0 ]; then
fi

case "$1" in
init_docsum)
pushd manifests/DocSum/xeon
init_docsum
popd
;;
init_codetrans)
pushd manifests/CodeTrans/xeon
init_codetrans
Expand All @@ -173,6 +222,12 @@ case "$1" in
init_chatqna
popd
;;
install_docsum)
pushd manifests/DocSum/xeon
NAMESPACE=$2
install_docsum
popd
;;
install_codetrans)
pushd manifests/CodeTrans/xeon
NAMESPACE=$2
Expand All @@ -191,9 +246,14 @@ case "$1" in
install_chatqna
popd
;;
validate_docsum)
NAMESPACE=$2
SERVICE_NAME=docsum-llm-uservice
validate_docsum
;;
validate_codetrans)
NAMESPACE=$2
SERVICE_NAME=llm-llm-uservice
SERVICE_NAME=codetrans-llm-uservice
validate_codetrans
;;
validate_codegen)
Expand Down
1 change: 0 additions & 1 deletion helm-charts/common/llm-uservice/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ affinity: {}
tgi:
LLM_MODEL_ID: m-a-p/OpenCodeInterpreter-DS-6.7B
# LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B
port: 80

image:
repository: ghcr.io/huggingface/text-generation-inference
Expand Down
23 changes: 18 additions & 5 deletions manifests/CodeTrans/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
> [NOTE]
> The following values must be set before you can deploy:
> HUGGINGFACEHUB_API_TOKEN
> You can also customize the "MODEL_ID" and "model-volume"
> The manifest llm.yaml is generated from helm chart.
## Deploy On Xeon

```
cd GenAIExamples/CodeTrans/kubernetes/manifests/xeon
cd GenAIInfra/manifests/CodeTrans/xeon
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml
kubectl apply -f llm.yaml
Expand All @@ -18,7 +16,7 @@ kubectl apply -f llm.yaml
## Deploy On Gaudi

```
cd GenAIExamples/CodeTrans/kubernetes/manifests/gaudi
cd GenAIInfra/manifests/CodeTrans/gaudi
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml
kubectl apply -f llm.yaml
Expand All @@ -30,8 +28,23 @@ Make sure all the pods are running, and restart the llm-xxxx pod if necessary.

```
kubectl get pods
curl http://llm-llm-uservice:9000/v1/chat/completions\
curl http://codetrans-llm-uservice:9000/v1/chat/completions\
-X POST \
-d '{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}' \
-H 'Content-Type: application/json'
```

## Generate the llm file from helm chart

The llm file is generated from llm-uservice helm chart automatically.

Here is the exact command lines:

```
cd GenAIInfra/manifests/CodeTrans
export HF_TOKEN="insert-your-huggingface-token-here"
export MODELDIR="/mnt"
helm template codetrans ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" --values ../../helm-charts/common/llm-uservice/values.yaml > xeon/llm.yaml
helm template codetrans ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" --values ../../helm-charts/common/llm-uservice/gaudi-values.yaml > gaudi/llm.yaml
```
56 changes: 39 additions & 17 deletions manifests/CodeTrans/gaudi/llm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
apiVersion: v1
kind: Service
metadata:
name: llm-tgi
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.1.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: llm
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
Expand All @@ -22,16 +22,20 @@ spec:
name: tgi
selector:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: llm
app.kubernetes.io/instance: codetrans
---
# Source: llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: v1
kind: Service
metadata:
name: llm-llm-uservice
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.1.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: llm
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand All @@ -43,29 +47,33 @@ spec:
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: llm
app.kubernetes.io/instance: codetrans
---
# Source: llm-uservice/charts/tgi/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-tgi
name: codetrans-tgi
labels:
helm.sh/chart: tgi-0.1.0
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: llm
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "1.4"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: llm
app.kubernetes.io/instance: codetrans
template:
metadata:
labels:
app.kubernetes.io/name: tgi
app.kubernetes.io/instance: llm
app.kubernetes.io/instance: codetrans
spec:
securityContext:
{}
Expand All @@ -76,6 +84,10 @@ spec:
value: HuggingFaceH4/mistral-7b-grok
- name: PORT
value: "80"
- name: HUGGING_FACE_HUB_TOKEN
value: "insert-your-huggingface-token-here"
- name: HF_TOKEN
value: "insert-your-huggingface-token-here"
- name: http_proxy
value:
- name: https_proxy
Expand All @@ -102,35 +114,39 @@ spec:
path: /mnt
type: Directory
---
# Source: llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
name: llm-llm-uservice
name: codetrans-llm-uservice
labels:
helm.sh/chart: llm-uservice-0.1.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: llm
app.kubernetes.io/instance: codetrans
app.kubernetes.io/version: "1.0.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: llm
app.kubernetes.io/instance: codetrans
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: llm
app.kubernetes.io/instance: codetrans
spec:
securityContext:
{}
containers:
- name: llm
- name: codetrans
env:
- name: TGI_LLM_ENDPOINT
value: "http://llm-tgi:80"
value: "http://codetrans-tgi"
- name: HUGGINGFACEHUB_API_TOKEN
value: "insert-your-huggingface-token-here"
- name: http_proxy
Expand All @@ -139,6 +155,12 @@ spec:
value:
- name: no_proxy
value:
- name: LANGCHAIN_TRACING_V2
value: "false"
- name: LANGCHAIN_API_KEY
value: insert-your-langchain-key-here
- name: LANGCHAIN_PROJECT
value: "opea-llm-service"

securityContext:
{}
Expand All @@ -152,7 +174,7 @@ spec:
exec:
command:
- curl
- http://llm-tgi:80
- http://codetrans-tgi
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 120
Expand Down
Loading

0 comments on commit 9ab8de0

Please sign in to comment.