Add DocSum llm service manifests (#111)

DocSum manifest support. Regenerate the CodeTrans yaml file. Signed-off-by: Dolpher Du <[email protected]>
opea-project · Jun 20, 2024 · 9ab8de0 · 9ab8de0
1 parent 18f53c9
commit 9ab8de0
Show file tree

Hide file tree

Showing 12 changed files with 548 additions and 388 deletions.
diff --git a/.github/workflows/scripts/e2e/manifest_test.sh b/.github/workflows/scripts/e2e/manifest_test.sh
@@ -9,6 +9,16 @@ MOUNT_DIR=/home/$USER_ID/charts-mnt
 # IMAGE_REPO is $OPEA_IMAGE_REPO, or else ""
 IMAGE_REPO=${OPEA_IMAGE_REPO:-""}
 
+function init_docsum() {
+    # executed under path manifest/docsum/xeon
+    # replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
+    find . -name '*.yaml' -type f -exec sed -i "s#path: /mnt#path: $MOUNT_DIR#g" {} \;
+    # replace the repository "image: opea/*" with "image: ${IMAGE_REPO}opea/"
+    find . -name '*.yaml' -type f -exec sed -i "s#image: \"opea/*#image: \"${IMAGE_REPO}opea/#g" {} \;
+    # set huggingface token
+    find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
+}
+
 function init_codetrans() {
     # executed under path manifest/codetrans/xeon
     # replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
@@ -29,6 +39,11 @@ function init_codegen() {
     find . -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$(cat /home/$USER_ID/.cache/huggingface/token)#g" {} \;
 }
 
+function install_docsum {
+    echo "namespace is $NAMESPACE"
+    kubectl apply -f . -n $NAMESPACE
+}
+
 function install_codetrans {
     echo "namespace is $NAMESPACE"
     kubectl apply -f . -n $NAMESPACE
@@ -61,6 +76,35 @@ function install_chatqna {
     kubectl apply -f chaqna-xeon-backend-server.yaml -n $NAMESPACE
 }
 
+function validate_docsum() {
+    ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
+    port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
+    echo "try to curl http://${ip_address}:${port}/v1/chat/docsum..."
+    # Curl the DocSum LLM Service
+    curl http://${ip_address}:${port}/v1/chat/docsum \
+      -X POST \
+      -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
+      -H 'Content-Type: application/json' > $LOG_PATH/curl_docsum.log
+    exit_code=$?
+    if [ $exit_code -ne 0 ]; then
+        echo "LLM for docsum failed, please check the logs in ${LOG_PATH}!"
+        exit 1
+    fi
+
+    echo "Checking response results, make sure the output is reasonable. "
+    local status=false
+    if [[ -f $LOG_PATH/curl_docsum.log ]] && \
+    [[ $(grep -c "TEI" $LOG_PATH/curl_docsum.log) != 0 ]]; then
+        status=true
+    fi
+
+    if [ $status == false ]; then
+        echo "Response check failed, please check the logs in artifacts!"
+    else
+        echo "Response check succeed!"
+    fi
+}
+
 function validate_codetrans() {
     ip_address=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.clusterIP}')
     port=$(kubectl get svc $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}')
@@ -158,6 +202,11 @@ if [ $# -eq 0 ]; then
 fi
 
 case "$1" in
+    init_docsum)
+        pushd manifests/DocSum/xeon
+        init_docsum
+        popd
+        ;;
     init_codetrans)
         pushd manifests/CodeTrans/xeon
         init_codetrans
@@ -173,6 +222,12 @@ case "$1" in
         init_chatqna
         popd
         ;;
+    install_docsum)
+        pushd manifests/DocSum/xeon
+        NAMESPACE=$2
+        install_docsum
+        popd
+        ;;
     install_codetrans)
         pushd manifests/CodeTrans/xeon
         NAMESPACE=$2
@@ -191,9 +246,14 @@ case "$1" in
         install_chatqna
         popd
         ;;
+    validate_docsum)
+        NAMESPACE=$2
+        SERVICE_NAME=docsum-llm-uservice
+        validate_docsum
+        ;;
     validate_codetrans)
         NAMESPACE=$2
-        SERVICE_NAME=llm-llm-uservice
+        SERVICE_NAME=codetrans-llm-uservice
         validate_codetrans
         ;;
     validate_codegen)

diff --git a/helm-charts/common/llm-uservice/values.yaml b/helm-charts/common/llm-uservice/values.yaml
@@ -58,7 +58,6 @@ affinity: {}
 tgi:
   LLM_MODEL_ID: m-a-p/OpenCodeInterpreter-DS-6.7B
   # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B
-  port: 80
 
   image:
     repository: ghcr.io/huggingface/text-generation-inference

diff --git a/manifests/CodeTrans/README.md b/manifests/CodeTrans/README.md
@@ -3,13 +3,11 @@
 > [NOTE]
 > The following values must be set before you can deploy:
 > HUGGINGFACEHUB_API_TOKEN
-> You can also customize the "MODEL_ID" and "model-volume"
-> The manifest llm.yaml is generated from helm chart.
 
 ## Deploy On Xeon
 
 ```
-cd GenAIExamples/CodeTrans/kubernetes/manifests/xeon
+cd GenAIInfra/manifests/CodeTrans/xeon
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml
 kubectl apply -f llm.yaml
@@ -18,7 +16,7 @@ kubectl apply -f llm.yaml
 ## Deploy On Gaudi
 
 ```
-cd GenAIExamples/CodeTrans/kubernetes/manifests/gaudi
+cd GenAIInfra/manifests/CodeTrans/gaudi
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" llm.yaml
 kubectl apply -f llm.yaml
@@ -30,8 +28,23 @@ Make sure all the pods are running, and restart the llm-xxxx pod if necessary.
 
 ```
 kubectl get pods
-curl http://llm-llm-uservice:9000/v1/chat/completions\
+curl http://codetrans-llm-uservice:9000/v1/chat/completions\
   -X POST \
   -d '{"query":"    ### System: Please translate the following Golang codes into  Python codes.    ### Original codes:    '\'''\'''\''Golang    \npackage main\n\nimport \"fmt\"\nfunc main() {\n    fmt.Println(\"Hello, World!\");\n    '\'''\'''\''    ### Translated codes:"}' \
   -H 'Content-Type: application/json'
 ```
+
+## Generate the llm file from helm chart
+
+The llm file is generated from llm-uservice helm chart automatically.
+
+Here is the exact command lines:
+
+```
+cd GenAIInfra/manifests/CodeTrans
+export HF_TOKEN="insert-your-huggingface-token-here"
+export MODELDIR="/mnt"
+helm template codetrans ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" --values ../../helm-charts/common/llm-uservice/values.yaml > xeon/llm.yaml
+helm template codetrans ../../helm-charts/common/llm-uservice --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set image.repository="opea/llm-tgi:latest" --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" --values ../../helm-charts/common/llm-uservice/gaudi-values.yaml > gaudi/llm.yaml
+
+```
diff --git a/manifests/CodeTrans/gaudi/llm.yaml b/manifests/CodeTrans/gaudi/llm.yaml
@@ -6,11 +6,11 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: llm-tgi
+  name: codetrans-tgi
   labels:
     helm.sh/chart: tgi-0.1.0
     app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.4"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -22,16 +22,20 @@ spec:
       name: tgi
   selector:
     app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
 ---
+# Source: llm-uservice/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: v1
 kind: Service
 metadata:
-  name: llm-llm-uservice
+  name: codetrans-llm-uservice
   labels:
     helm.sh/chart: llm-uservice-0.1.0
     app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.0.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -43,29 +47,33 @@ spec:
       name: llm-uservice
   selector:
     app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
 ---
+# Source: llm-uservice/charts/tgi/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: llm-tgi
+  name: codetrans-tgi
   labels:
     helm.sh/chart: tgi-0.1.0
     app.kubernetes.io/name: tgi
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.4"
     app.kubernetes.io/managed-by: Helm
 spec:
   replicas: 1
   selector:
     matchLabels:
       app.kubernetes.io/name: tgi
-      app.kubernetes.io/instance: llm
+      app.kubernetes.io/instance: codetrans
   template:
     metadata:
       labels:
         app.kubernetes.io/name: tgi
-        app.kubernetes.io/instance: llm
+        app.kubernetes.io/instance: codetrans
     spec:
       securityContext:
         {}
@@ -76,6 +84,10 @@ spec:
               value: HuggingFaceH4/mistral-7b-grok
             - name: PORT
               value: "80"
+            - name: HUGGING_FACE_HUB_TOKEN
+              value: "insert-your-huggingface-token-here"
+            - name: HF_TOKEN
+              value: "insert-your-huggingface-token-here"
             - name: http_proxy
               value:
             - name: https_proxy
@@ -102,35 +114,39 @@ spec:
             path: /mnt
             type: Directory
 ---
+# Source: llm-uservice/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: llm-llm-uservice
+  name: codetrans-llm-uservice
   labels:
     helm.sh/chart: llm-uservice-0.1.0
     app.kubernetes.io/name: llm-uservice
-    app.kubernetes.io/instance: llm
+    app.kubernetes.io/instance: codetrans
     app.kubernetes.io/version: "1.0.0"
     app.kubernetes.io/managed-by: Helm
 spec:
   replicas: 1
   selector:
     matchLabels:
       app.kubernetes.io/name: llm-uservice
-      app.kubernetes.io/instance: llm
+      app.kubernetes.io/instance: codetrans
   template:
     metadata:
       labels:
         app.kubernetes.io/name: llm-uservice
-        app.kubernetes.io/instance: llm
+        app.kubernetes.io/instance: codetrans
     spec:
       securityContext:
         {}
       containers:
-        - name: llm
+        - name: codetrans
           env:
             - name: TGI_LLM_ENDPOINT
-              value: "http://llm-tgi:80"
+              value: "http://codetrans-tgi"
             - name: HUGGINGFACEHUB_API_TOKEN
               value: "insert-your-huggingface-token-here"
             - name: http_proxy
@@ -139,6 +155,12 @@ spec:
               value:
             - name: no_proxy
               value:
+            - name: LANGCHAIN_TRACING_V2
+              value: "false"
+            - name: LANGCHAIN_API_KEY
+              value: insert-your-langchain-key-here
+            - name: LANGCHAIN_PROJECT
+              value: "opea-llm-service"
 
           securityContext:
             {}
@@ -152,7 +174,7 @@ spec:
             exec:
               command:
               - curl
-              - http://llm-tgi:80
+              - http://codetrans-tgi
             initialDelaySeconds: 5
             periodSeconds: 5
             failureThreshold: 120